ViennaCL-1.5.1-src/000755 001750 001750 00000000000 12267307531 013766 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/cmake/000755 001750 001750 00000000000 12267307463 015052 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/cmake/FindOpenCL.cmake000644 001750 001750 00000004710 12267307463 017777 0ustar00rupprupp000000 000000 # - Find the OpenCL headers and library # # Defines the following if found: # OPENCL_FOUND : TRUE if found, FALSE otherwise # OPENCL_INCLUDE_DIRS : Include directories for OpenCL # OPENCL_LIBRARIES : The libraries to link against # # The user can set the OPENCLROOT environment variable to help finding OpenCL # if it is installed in a non-standard place. set(ENV_ATISTREAMSDKROOT $ENV{ATISTREAMSDKROOT}) if(ENV_ATISTREAMSDKROOT) set(ENV_OPENCLROOT $ENV{ATISTREAMSDKROOT}) endif(ENV_ATISTREAMSDKROOT) set(ENV_AMDAPPSDKROOT $ENV{AMDAPPSDKROOT}) if(ENV_AMDAPPSDKROOT) set(ENV_OPENCLROOT $ENV{AMDAPPSDKROOT}) endif(ENV_AMDAPPSDKROOT) set(ENV_INTELOCLSDKROOT $ENV{INTELOCLSDKROOT}) if(ENV_INTELOCLSDKROOT) set(ENV_OPENCLROOT $ENV{INTELOCLSDKROOT}) endif(ENV_INTELOCLSDKROOT) set(ENV_OPENCLROOT2 $ENV{OPENCLROOT}) if(ENV_OPENCLROOT2) set(ENV_OPENCLROOT $ENV{OPENCLROOT}) endif(ENV_OPENCLROOT2) if(ENV_OPENCLROOT) find_path( OPENCL_INCLUDE_DIR NAMES CL/cl.h OpenCL/cl.h PATHS ${ENV_OPENCLROOT}/include #NO_DEFAULT_PATH #uncomment this is you wish to surpress the use of default paths for OpenCL ) if (("${CMAKE_SYSTEM_NAME}" MATCHES "Linux") OR (${CMAKE_SYSTEM_NAME} MATCHES "Windows")) if(CMAKE_SIZEOF_VOID_P EQUAL 4) set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_OPENCLROOT}/lib/x86) else(CMAKE_SIZEOF_VOID_P EQUAL 4) set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_OPENCLROOT}/lib/x86_64) endif(CMAKE_SIZEOF_VOID_P EQUAL 4) endif(("${CMAKE_SYSTEM_NAME}" MATCHES "Linux") OR (${CMAKE_SYSTEM_NAME} MATCHES "Windows")) find_library( OPENCL_LIBRARY NAMES OpenCL PATHS ${OPENCL_LIB_SEARCH_PATH} #NO_DEFAULT_PATH #uncomment this is you wish to surpress the use of default paths for OpenCL ) else(ENV_OPENCLROOT) find_path( OPENCL_INCLUDE_DIR NAMES CL/cl.h OpenCL/cl.h PATHS ${PROJECT_SOURCE_DIR} #use the CL/ include folder provided with ViennaCL ) find_library( OPENCL_LIBRARY NAMES OpenCL ) endif(ENV_OPENCLROOT) include(FindPackageHandleStandardArgs) find_package_handle_standard_args( OPENCL DEFAULT_MSG OPENCL_LIBRARY OPENCL_INCLUDE_DIR ) if(OPENCL_FOUND) set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR}) set(OPENCL_LIBRARIES ${OPENCL_LIBRARY}) else(OPENCL_FOUND) set(OPENCL_INCLUDE_DIRS) set(OPENCL_LIBRARIES) endif(OPENCL_FOUND) mark_as_advanced( OPENCL_INCLUDE_DIR OPENCL_LIBRARY ) ViennaCL-1.5.1-src/cmake/ViennaCLCommon.cmake000644 001750 001750 00000011225 12267307463 020665 0ustar00rupprupp000000 000000 include(CTest) include(CMakeDependentOption) # Installation directories ########################## set(INSTALL_INCLUDE_DIR include CACHE PATH "Installation directory for headers") if(WIN32 AND NOT CYGWIN) set(DEF_INSTALL_CMAKE_DIR CMake) else() set(DEF_INSTALL_CMAKE_DIR lib/cmake/viennacl) endif() set(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH "Installation directory for CMake files") if(NOT IS_ABSOLUTE "${INSTALL_CMAKE_DIR}") set(INSTALL_CMAKE_DIR "${CMAKE_INSTALL_PREFIX}/${INSTALL_CMAKE_DIR}") endif() file(RELATIVE_PATH CONF_REL_INSTALL_PREFIX "${INSTALL_CMAKE_DIR}" "${CMAKE_INSTALL_PREFIX}") if(NOT IS_ABSOLUTE "${INSTALL_INCLUDE_DIR}") set(INSTALL_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INSTALL_INCLUDE_DIR}") endif() file(RELATIVE_PATH CONF_REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}" "${INSTALL_INCLUDE_DIR}") # User options ############## option(ENABLE_CUDA "Use the CUDA backend" OFF) option(BUILD_EXAMPLES "Build example programs" ON) option(ENABLE_OPENCL "Use the OpenCL backend" ON) option(ENABLE_OPENMP "Use OpenMP acceleration" OFF) # If you are interested in the impact of different kernel parameters on # performance, you may want to give ViennaProfiler a try (see # http://sourceforge.net/projects/viennaprofiler/) Set your connection # parameters in examples/parameters/common_vprof.hpp accordingly. cmake_dependent_option(ENABLE_VIENNAPROFILER "Enable examples using ViennaProfiler" OFF BUILD_EXAMPLES OFF) # If you want to build the examples that use boost::numeric::ublas, enable # the following: cmake_dependent_option(ENABLE_UBLAS "Enable examples using uBLAS" OFF BUILD_EXAMPLES OFF) # If you want to build the examples that use Eigen cmake_dependent_option(ENABLE_EIGEN "Enable examples that use Eigen" OFF BUILD_EXAMPLES OFF) # If you want to build the examples that use MTL4 cmake_dependent_option(ENABLE_MTL4 "Enable examples that use MTL4" OFF BUILD_EXAMPLES OFF) option(ENABLE_PEDANTIC_FLAGS "Enable pedantic compiler flags (GCC and Clang only)" OFF) mark_as_advanced(BOOSTPATH ENABLE_VIENNAPROFILER ENABLE_EIGEN ENABLE_MTL4 ENABLE_PEDANTIC_FLAGS) # Find prerequisites #################### # Boost: IF (BOOSTPATH) SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} ${BOOSTPATH}) SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${BOOSTPATH}/lib") SET(BOOST_ROOT ${BOOSTPATH}) ENDIF (BOOSTPATH) if(ENABLE_UBLAS OR BUILD_TESTING) set(Boost_USE_MULTITHREADED TRUE) find_package(Boost) if (Boost_MINOR_VERSION LESS 34) find_package(Boost REQUIRED COMPONENTS thread) elseif (Boost_MINOR_VERSION LESS 47) find_package(Boost REQUIRED COMPONENTS date_time serialization system thread) else () find_package(Boost REQUIRED COMPONENTS chrono date_time serialization system thread) endif() endif() if (ENABLE_CUDA) find_package(CUDA REQUIRED) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -arch=sm_13 -DVIENNACL_WITH_CUDA) endif(ENABLE_CUDA) if (ENABLE_OPENCL) find_package(OpenCL REQUIRED) endif(ENABLE_OPENCL) if (ENABLE_OPENMP) find_package(OpenMP REQUIRED) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS} -DVIENNACL_WITH_OPENMP") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS} -DVIENNACL_WITH_OPENMP") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") endif(ENABLE_OPENMP) if(ENABLE_VIENNAPROFILER) find_package(ViennaProfiler REQUIRED) endif() if(ENABLE_EIGEN) # find Eigen find_path(EIGEN_INCLUDE_DIR Eigen/Dense) if(NOT EIGEN_INCLUDE_DIR) message(SEND_ERROR "Failed to find Eigen") endif() mark_as_advanced(EIGEN_INCLUDE_DIR) endif() if(ENABLE_MTL4) # MTL4 comes with a MTLConfig.cmake find_package(MTL REQUIRED) endif() include_directories( ${PROJECT_SOURCE_DIR} ${OPENCL_INCLUDE_DIRS}) # Set high warning level on GCC if(ENABLE_PEDANTIC_FLAGS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pedantic") endif() # Disable Warning 4996 (std::copy is unsafe ...) on Visual Studio if (MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996") endif() # Export ######## configure_file(cmake/FindOpenCL.cmake ${PROJECT_BINARY_DIR}/FindOpenCL.cmake COPYONLY) configure_file(cmake/ViennaCLConfig.cmake.in ${PROJECT_BINARY_DIR}/ViennaCLConfig.cmake @ONLY) configure_file(cmake/ViennaCLConfigVersion.cmake.in ${PROJECT_BINARY_DIR}/ViennaCLConfigVersion.cmake @ONLY) if (CMAKE_MINOR_VERSION GREATER 6) # export(PACKAGE ...) introduced with CMake 2.8.0 export(PACKAGE ViennaCL) endif() # Install ######### install(FILES ${PROJECT_BINARY_DIR}/FindOpenCL.cmake ${PROJECT_BINARY_DIR}/ViennaCLConfig.cmake ${PROJECT_BINARY_DIR}/ViennaCLConfigVersion.cmake DESTINATION ${INSTALL_CMAKE_DIR} COMPONENT dev) ViennaCL-1.5.1-src/cmake/ViennaCLConfigVersion.cmake.in000644 001750 001750 00000000652 12267307463 022617 0ustar00rupprupp000000 000000 set(PACKAGE_VERSION "@VERSION@") set(PACKAGE_VERSION_COMPATIBLE FALSE) if(NOT "${PACKAGE_FIND_VERSION}" VERSION_LESS "${PACKAGE_VERSION}") if(NOT PACKAGE_FIND_VERSION_EXACT OR "${PACKAGE_FIND_VERSION}" VERSION_EQUAL "${PACKAGE_VERSION}") set(PACKAGE_VERSION_COMPATIBLE TRUE) endif() if(PACKAGE_FIND_VERSION_EXACT AND PACKAGE_VERSION_COMPATIBLE) set(PACKAGE_VERSION_EXACT TRUE) endif() endif() ViennaCL-1.5.1-src/cmake/FindMTL.cmake000644 001750 001750 00000000617 12267307463 017315 0ustar00rupprupp000000 000000 #SET(MTL_INCLUDE_DIRS "${MTL_DIR}/../../include") find_package(Boost 1.36 REQUIRED) if(Boost_FOUND) LIST(APPEND MTL_INCLUDE_DIRS ${Boost_INCLUDE_DIRS}) endif(Boost_FOUND) # find MTL find_path(MTL_INCLUDE_DIR boost/numeric/itl) if(NOT MTL_INCLUDE_DIR) message(SEND_ERROR "Failed to find MTL") endif() mark_as_advanced(MTL_INCLUDE_DIR) include_directories(${MTL_INCLUDE_DIRS} ${MTL_INCLUDE_DIR}) ViennaCL-1.5.1-src/cmake/ViennaCLConfig.cmake.in000644 001750 001750 00000002423 12267307463 021247 0ustar00rupprupp000000 000000 #- CMake configuration file for ViennaCL # Sets the following variables if ViennaCL was found: # VIENNACL_FOUND : TRUE if found # VIENNACL_INCLUDE_DIRS : Include-directories to be used # VIENNACL_LIBRARIES : Libraries to link against # Although ViennaCL is a headers-only library, it still requires an OpenCL # implementation, which is why it is necessary to link against a library. # Compute paths get_filename_component(VIENNACL_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) if(EXISTS "${VIENNACL_CMAKE_DIR}/CMakeCache.txt") # in build tree get_filename_component(VIENNACL_INSTALL_PREFIX "${VIENNACL_CMAKE_DIR}" PATH) set(VIENNACL_INCLUDE_DIR "${_VIENNACL_PREFIX}") else() set(VIENNACL_INSTALL_PREFIX "${VIENNACL_CMAKE_DIR}/@CONF_REL_INSTALL_PREFIX@") set(VIENNACL_INCLUDE_DIR "${VIENNACL_CMAKE_DIR}/@CONF_REL_INCLUDE_DIR@") endif() # Find OpenCL set(_VIENNACL_CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}") list(INSERT CMAKE_MODULE_PATH 0 "${VIENNACL_CMAKE_DIR}") if(ViennaCL_FIND_REQUIRED) find_package(OpenCL QUIET REQUIRED) else() find_package(OpenCL QUIET) endif() set(CMAKE_MODULE_PATH "${_VIENNACL_CMAKE_MODULE_PATH}") # Set up variables set(VIENNACL_INCLUDE_DIRS ${VIENNACL_INCLUDE_DIR} ${OPENCL_INCLUDE_DIRS}) set(VIENNACL_LIBRARIES ${OPENCL_LIBRARIES}) ViennaCL-1.5.1-src/cmake/FindViennaProfiler.cmake000644 001750 001750 00000002416 12267307463 021603 0ustar00rupprupp000000 000000 # - Find the headers and libraries required by ViennaProfiler # # Defines the following if found: # VIENNAPROFILER_FOUND : TRUE if found, FALSE otherwise # VIENNAPROFILER_INCLUDE_DIRS : Include directories for MySQL++ # VIENNAPROFILER_LIBRARIES : The libraries to link against # first find MySQL find_path(MYSQL_INCLUDE_DIR mysql.h PATH_SUFFIXES mysql) find_library(MYSQL_LIBRARY mysqlclient) # now find MySQL++ if(MYSQL_INCLUDE_DIR AND MYSQL_LIBRARY) get_filename_component(_MYSQLPPROOT "${MYSQL_INCLUDE_DIR}" PATH) find_path(MYSQLPP_INCLUDE_DIR mysql++/mysql++.h HINTS "${_MYSQLPPROOT}/include") find_library(MYSQLPP_LIBRARY mysqlpp HINTS "${_MYSQLPPROOT}/lib") endif() # then find ViennaProfiler find_path(VIENNAPROFILER_INCLUDE_DIR viennaprofiler/profiler.hpp) mark_as_advanced(MYSQL_INCLUDE_DIR MYSQL_LIBRARY MYSQLPP_INCLUDE_DIR MYSQLPP_LIBRARY VIENNAPROFILER_INCLUDE_DIR) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(ViennaProfiler VIENNAPROFILER_INCLUDE_DIR MYSQLPP_INCLUDE_DIR MYSQLPP_LIBRARY MYSQL_INCLUDE_DIR MYSQL_LIBRARY) set(VIENNAPROFILER_INCLUDE_DIRS "${VIENNAPROFILER_INCLUDE_DIR}" "${MYSQL_INCLUDE_DIR}" "${MYSQLPP_INCLUDE_DIR}") set(VIENNAPROFILER_LIBRARIES "${MYSQL_LIBRARY}" "${MYSQLPP_LIBRARY}") ViennaCL-1.5.1-src/cmake/copymanual.cmake000644 001750 001750 00000000320 12267307463 020217 0ustar00rupprupp000000 000000 file (COPY ${SRC} DESTINATION ${DST} FILES_MATCHING PATTERN "*.tex") file (COPY ${SRC} DESTINATION ${DST} FILES_MATCHING PATTERN "*.bst") file (COPY ${SRC} DESTINATION ${DST} FILES_MATCHING PATTERN "*.bib") ViennaCL-1.5.1-src/doc/000755 001750 001750 00000000000 12267307531 014533 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/doc/Doxyfile.in000644 001750 001750 00000175311 12267307531 016656 0ustar00rupprupp000000 000000 # Doxyfile 1.5.8 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project # # All text after a hash (#) is considered a comment and will be ignored # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" ") #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # http://www.gnu.org/software/libiconv for the list of possible encodings. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded # by quotes) that should identify the project. PROJECT_NAME = "ViennaCL - The Vienna Computing Library" # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or # if some version control system is used. PROJECT_NUMBER = 1.5.1 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. OUTPUT_DIRECTORY = doxygen/ # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # The default language is English, other supported languages are: # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, # Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek, # Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish, # Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, Slovene, # Spanish, Swedish, and Ukrainian. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will # include brief member descriptions after the members that are listed in # the file and class documentation (similar to JavaDoc). # Set to NO to disable this. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend # the brief description of a member or function before the detailed description. # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator # that is used to form the text in various listings. Each string # in this list, if found as the leading text of the brief description, will be # stripped from the text and the result after processing the whole list, is # used as the annotated text. Otherwise, the brief description is used as-is. # If left blank, the following values are used ("$name" is automatically # replaced with the name of the entity): "The $name class" "The $name widget" # "The $name file" "is" "provides" "specifies" "contains" # "represents" "a" "an" "the" ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # Doxygen will generate a detailed section even if there is only a brief # description. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. FULL_PATH_NAMES = YES # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is # only done if one of the specified strings matches the left-hand part of # the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the # path to strip. STRIP_FROM_PATH = /home/rupp/development/ViennaCL/viennacl-dev # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of # the path mentioned in the documentation of a class, which tells # the reader which header file to include in order to use a class. # If left blank only the name of the header file containing the class # definition is used. Otherwise one should specify the include paths that # are normally passed to the compiler using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter # (but less readable) file names. This can be useful is your file systems # doesn't support long names like on DOS, Mac, or CD-ROM. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen # will interpret the first line (until the first dot) of a JavaDoc-style # comment as the brief description. If set to NO, the JavaDoc # comments will behave just like regular Qt-style comments # (thus requiring an explicit @brief command for a brief description.) JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then Doxygen will # interpret the first line (until the first dot) of a Qt-style # comment as the brief description. If set to NO, the comments # will behave just like regular Qt-style comments (thus requiring # an explicit \brief command for a brief description.) QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen # treat a multi-line C++ special comment block (i.e. a block of //! or /// # comments) as a brief description. This used to be the default behaviour. # The new default is to treat a multi-line C++ comment block as a detailed # description. Set this tag to YES if you prefer the old behaviour instead. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented # member inherits the documentation from any documented member that it # re-implements. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce # a new page for each member. If set to NO, the documentation of a member will # be part of the file/class/namespace that contains it. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. TAB_SIZE = 8 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". # For example adding "sideeffect=\par Side Effects:\n" will allow you to # put the command \sideeffect (or @sideeffect) in the documentation, which # will result in a user-defined paragraph with heading "Side Effects:". # You can put \n's in the value part of an alias to insert newlines. ALIASES = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for # Java. For instance, namespaces will be presented as packages, qualified # scopes will look different, etc. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources only. Doxygen will then generate output that is more tailored for # Fortran. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for # VHDL. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it parses. # With this tag you can assign which parser to use for a given extension. # Doxygen has a built-in mapping, but you can override or extend it using this tag. # The format is ext=language, where ext is a file extension, and language is one of # the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP, # Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat # .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran), # use: inc=Fortran f=C EXTENSION_MAPPING = # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. # func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. # Doxygen will parse them like normal C++ but will assume all classes use public # instead of private inheritance when no explicit protection keyword is present. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate getter # and setter methods for a property. Setting this option to YES (the default) # will make doxygen to replace the get and set methods by a property in the # documentation. This will only work if the methods are indeed getting or # setting a simple type. If this is not the case, or you want to show the # methods anyway, you should set this option to NO. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a # subgroup of that type (e.g. under the Public Functions section). Set it to # NO to prevent subgrouping. Alternatively, this can be done per class using # the \nosubgrouping command. SUBGROUPING = YES # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum # is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically # be useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. TYPEDEF_HIDES_STRUCT = NO # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to # determine which symbols to keep in memory and which to flush to disk. # When the cache is full, less often used symbols will be written to disk. # For small to medium size projects (<1000 input files) the default value is # probably good enough. For larger projects a too small cache size can cause # doxygen to be busy swapping symbols to and from disk most of the time # causing a significant performance penality. # If the system has enough physical memory increasing the cache will improve the # performance by keeping more symbols in memory. Note that the value works on # a logarithmic scale so increasing the size by one will rougly double the # memory usage. The cache size is given by this formula: # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols SYMBOL_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. EXTRACT_PRIVATE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base # name of the file that contains the anonymous namespace. By default # anonymous namespace are hidden. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. # If set to NO (the default) these members will be included in the # various overviews, but no documentation section is generated. # This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. # If set to NO (the default) these classes will be included in the various # overviews. This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. # If set to NO (the default) these declarations will be included in the # documentation. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any # documentation blocks found inside the body of a function. # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. HIDE_SCOPE_NAMES = YES # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. SHOW_INCLUDE_FILES = YES # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] # is inserted in the documentation for inline members. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen # will sort the (detailed) documentation of file and class members # alphabetically by member name. If set to NO the members will appear in # declaration order. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the # brief documentation of file, namespace and class members alphabetically # by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the # hierarchy of group names into alphabetical order. If set to NO (the default) # the group names will appear in their defined order. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be # sorted by fully-qualified names, including namespaces. If set to # NO (the default), the class list will be sorted only by class name, # not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the # alphabetical list. SORT_BY_SCOPE_NAME = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if sectionname ... \endif. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines # the initial value of a variable or define consists of for it to appear in # the documentation. If the initializer consists of more lines than specified # here it will be hidden. Use a value of 0 to hide initializers completely. # The appearance of the initializer of individual variables and defines in the # documentation can be controlled using \showinitializer or \hideinitializer # command in the documentation regardless of this setting. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # If the sources in your project are distributed over multiple directories # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy # in the documentation. The default is NO. SHOW_DIRECTORIES = NO # Set the SHOW_FILES tag to NO to disable the generation of the Files page. # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the # Namespaces page. This will remove the Namespaces entry from the Quick Index # and from the Folder Tree View (if specified). The default is YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command , where is the value of # the FILE_VERSION_FILTER tag, and is the name of an input file # provided by doxygen. Whatever the program writes to standard output # is used as the file version. See the manual for examples. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by # doxygen. The layout file controls the global structure of the generated output files # in an output format independent way. The create the layout file that represents # doxygen's defaults, run doxygen with the -l option. You can optionally specify a # file name after the option, if omitted DoxygenLayout.xml will be used as the name # of the layout file. LAYOUT_FILE = #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank # NO is used. WARNINGS = YES # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. WARN_IF_UNDOCUMENTED = YES # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some # parameters in a documented function, or documenting parameters that # don't exist or using markup commands wrongly. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be abled to get warnings for # functions that are documented, but have no documentation for their parameters # or return value. If set to NO (the default) doxygen will only warn about # wrong or incomplete parameter documentation, but not about the absence of # documentation. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that # doxygen can produce. The string should contain the $file, $line, and $text # tags, which will be replaced by the file and line number from which the # warning originated and the warning text. Optionally the format may contain # $version, which will be replaced by the version of the file (if it could # be obtained via FILE_VERSION_FILTER) WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning # and error messages should be written. If left blank the output is written # to stderr. WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = ../../viennacl # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is # also the default input encoding. Doxygen uses libiconv (or the iconv built # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for # the list of possible encodings. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank the following patterns are tested: # *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx # *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.d \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.idl \ *.odl \ *.cs \ *.php \ *.php3 \ *.inc \ *.m \ *.mm \ *.dox \ *.py \ *.f90 \ *.f \ *.vhd \ *.vhdl # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used select whether or not files or # directories that are symbolic links (a Unix filesystem feature) are excluded # from the input. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. Note that the wildcards are matched # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see # the \include command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank all files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude # commands irrespective of the value of the RECURSIVE tag. # Possible values are YES and NO. If left blank NO is used. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or # directories that contain image that are included in the documentation (see # the \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command , where # is the value of the INPUT_FILTER tag, and is the name of an # input file. Doxygen will then use the output that the filter program writes # to standard output. If FILTER_PATTERNS is specified, this tag will be # ignored. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER # is applied to all files. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will be used to filter the input files when producing source # files to browse (i.e. when SOURCE_BROWSER is set to YES). FILTER_SOURCE_FILES = NO #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C and C++ comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. Otherwise they will link to the documentation. REFERENCES_LINK_SOURCE = YES # If the USE_HTAGS tag is set to YES then the references to source code # will point to the HTML generated by the htags(1) tool instead of doxygen # built-in source browser. The htags tool is part of GNU's global source # tagging system (see http://www.gnu.org/software/global/global.html). You # will need version 4.8.6 or higher. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. ALPHABETICAL_INDEX = NO # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that # should be ignored while generating the index headers. IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If the tag is left blank doxygen # will generate a default style sheet. Note that doxygen will try to copy # the style sheet file to the HTML output directory, so don't put your own # stylesheet in the HTML output directory as well, or it will be erased! HTML_STYLESHEET = # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, # files or namespaces will be aligned in HTML using tables. If set to # NO a bullet list will be used. HTML_ALIGN_MEMBERS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. For this to work a browser that supports # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). HTML_DYNAMIC_SECTIONS = NO # If the GENERATE_DOCSET tag is set to YES, additional index files # will be generated that can be used as input for Apple's Xcode 3 # integrated development environment, introduced with OSX 10.5 (Leopard). # To create a documentation set, doxygen will generate a Makefile in the # HTML output directory. Running make will produce the docset in that # directory and running "make install" will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find # it at startup. # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information. GENERATE_DOCSET = NO # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the # feed. A documentation feed provides an umbrella under which multiple # documentation sets from a single provider (such as a company or product suite) # can be grouped. DOCSET_FEEDNAME = "Doxygen generated docs" # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that # should uniquely identify the documentation set bundle. This should be a # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen # will append .docset to the name. DOCSET_BUNDLE_ID = org.doxygen.Project # If the GENERATE_HTMLHELP tag is set to YES, additional index files # will be generated that can be used as input for tools like the # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) # of the generated HTML documentation. GENERATE_HTMLHELP = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can # be used to specify the file name of the resulting .chm file. You # can add a path in front of the file if the result should not be # written to the html output directory. CHM_FILE = # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can # be used to specify the location (absolute path including file name) of # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run # the HTML help compiler on the generated index.hhp. HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that # it should be included in the master .chm file (NO). GENERATE_CHI = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING # is used to encode HtmlHelp index (hhk), content (hhc) and project file # content. CHM_INDEX_ENCODING = # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag # controls whether a binary table of contents is generated (YES) or a # normal table of contents (NO) in the .chm file. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members # to the contents of the HTML help documentation and to the tree view. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER # are set, an additional index file will be generated that can be used as input for # Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated # HTML documentation. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can # be used to specify the file name of the resulting .qch file. # The path specified is relative to the HTML output folder. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#namespace QHP_NAMESPACE = # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#virtual-folders QHP_VIRTUAL_FOLDER = doc # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. # For more information please see # http://doc.trolltech.com/qthelpproject.html#custom-filters QHP_CUST_FILTER_NAME = # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see # Qt Help Project / Custom Filters. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's # filter section matches. # Qt Help Project / Filter Attributes. QHP_SECT_FILTER_ATTRS = # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can # be used to specify the location of Qt's qhelpgenerator. # If non-empty doxygen will try to run qhelpgenerator on the generated # .qhp file. QHG_LOCATION = # The DISABLE_INDEX tag can be used to turn on/off the condensed index at # top of each HTML page. The value NO (the default) enables the index and # the value YES disables it. DISABLE_INDEX = NO # This tag can be used to set the number of enum values (range [1..20]) # that doxygen will group on one line in the generated HTML documentation. ENUM_VALUES_PER_LINE = 4 # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. # If the tag value is set to FRAME, a side panel will be generated # containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, # Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are # probably better off using the HTML help feature. Other possible values # for this tag are: HIERARCHIES, which will generate the Groups, Directories, # and Class Hierarchy pages using a tree view instead of an ordered list; # ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which # disables this behavior completely. For backwards compatibility with previous # releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE # respectively. GENERATE_TREEVIEW = NONE # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 # Use this tag to change the font size of Latex formulas included # as images in the HTML documentation. The default is 10. Note that # when you change the font size after a successful doxygen run you need # to manually remove any form_*.png images from the HTML output directory # to force them to be regenerated. FORMULA_FONTSIZE = 10 #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. GENERATE_LATEX = NO # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. If left blank `latex' will be used as the default command name. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to # generate index for LaTeX. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact # LaTeX documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = NO # The PAPER_TYPE tag can be used to set the paper type that is used # by the printer. Possible values are: a4, a4wide, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = a4wide # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. EXTRA_PACKAGES = # The LATEX_HEADER tag can be used to specify a personal LaTeX header for # the generated latex document. The header should contain everything until # the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! LATEX_HEADER = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = YES # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of # plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. # command to the generated LaTeX files. This will instruct LaTeX to keep # running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = NO # If LATEX_HIDE_INDICES is set to YES then doxygen will not # include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = NO #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output # The RTF output is optimized for Word 97 and may not look very pretty with # other RTF readers or editors. GENERATE_RTF = NO # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `rtf' will be used as the default path. RTF_OUTPUT = rtf # If the COMPACT_RTF tag is set to YES Doxygen generates more compact # RTF documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_RTF = NO # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated # will contain hyperlink fields. The RTF file will # contain links (just like the HTML output) instead of page references. # This makes the output suitable for online browsing using WORD or other # programs which support those fields. # Note: wordpad (write) and others do not support links. RTF_HYPERLINKS = NO # Load stylesheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an rtf document. # Syntax is similar to doxygen's config file. RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- # If the GENERATE_XML tag is set to YES Doxygen will # generate an XML file that captures the structure of # the code including all documentation. GENERATE_XML = NO # The XML_OUTPUT tag is used to specify where the XML pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `xml' will be used as the default path. XML_OUTPUT = xml # The XML_SCHEMA tag can be used to specify an XML schema, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_SCHEMA = # The XML_DTD tag can be used to specify an XML DTD, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_DTD = # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that # enabling this will significantly increase the size of the XML output. XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will # generate an AutoGen Definitions (see autogen.sf.net) file # that captures the structure of the code including all # documentation. Note that this feature is still experimental # and incomplete at the moment. GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- # If the GENERATE_PERLMOD tag is set to YES Doxygen will # generate a Perl module file that captures the structure of # the code including all documentation. Note that this # feature is still experimental and incomplete at the # moment. GENERATE_PERLMOD = NO # If the PERLMOD_LATEX tag is set to YES Doxygen will generate # the necessary Makefile rules, Perl scripts and LaTeX code to be able # to generate PDF and DVI output from the Perl module output. PERLMOD_LATEX = NO # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be # nicely formatted so it can be parsed by a human reader. This is useful # if you want to understand what is going on. On the other hand, if this # tag is set to NO the size of the Perl module output will be much smaller # and Perl will parse it just the same. PERLMOD_PRETTY = YES # The names of the make variables in the generated doxyrules.make file # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. # This is useful so different doxyrules.make files included by the same # Makefile don't overwrite each other's variables. PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = NO # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # in the INCLUDE_PATH (see below) will be search if a #include is found. SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by # the preprocessor. INCLUDE_PATH = # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the # directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. INCLUDE_FILE_PATTERNS = # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition. EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all function-like macros that are alone # on a line, have an all uppercase name, and do not end with a semicolon. Such # function macros are typically used for boiler-plate code, and will confuse # the parser if not removed. SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- # The TAGFILES option can be used to specify one or more tagfiles. # Optionally an initial location of the external documentation # can be added for each tagfile. The format of a tag file without # this location is as follows: # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # TAGFILES = file1=loc1 "file2 = loc2" ... # where "loc1" and "loc2" can be relative or absolute paths or # URLs. If a location is present for each tag, the installdox tool # does not have to be run to correct the links. # Note that each tag file must have a unique name # (where the name does NOT include the path) # If a tag file is not located in the directory in which doxygen # is run, you must also specify the path to the tagfile here. TAGFILES = # When a file name is specified after GENERATE_TAGFILE, doxygen will create # a tag file that is based on the input files it reads. GENERATE_TAGFILE = # If the ALLEXTERNALS tag is set to YES all external classes will be listed # in the class index. If set to NO only the inherited external classes # will be listed. ALLEXTERNALS = NO # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed # in the modules index. If set to NO, only the current project's groups will # be listed. EXTERNAL_GROUPS = YES # The PERL_PATH should be the absolute path and name of the perl script # interpreter (i.e. the result of `which perl'). PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option is superseded by the HAVE_DOT option below. This is only a # fallback. It is recommended to install and use dot, since it yields more # powerful graphs. CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the # documentation. The MSCGEN_PATH tag allows you to specify the directory where # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. MSCGEN_PATH = # If set to YES, the inheritance and collaboration graphs will hide # inheritance and usage relations if the target is undocumented # or is not a class. HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz, a graph visualization # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) HAVE_DOT = NO # By default doxygen will write a font called FreeSans.ttf to the output # directory and reference it in all dot files that doxygen generates. This # font does not include all possible unicode characters however, so when you need # these (or just want a differently looking font) you can specify the font name # using DOT_FONTNAME. You need need to make sure dot is able to find the font, # which can be done by putting it in a standard location or by setting the # DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory # containing the font. DOT_FONTNAME = FreeSans # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. # The default size is 10pt. DOT_FONTSIZE = 10 # By default doxygen will tell dot to use the output directory to look for the # FreeSans.ttf font (which doxygen will put there itself). If you specify a # different font using DOT_FONTNAME you can set the path where dot # can find it using this tag. DOT_FONTPATH = # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # the CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = NO # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable call graphs # for selected functions only using the \callgraph command. CALL_GRAPH = NO # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then # doxygen will generate a caller dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. CALLER_GRAPH = NO # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are png, jpg, or gif # If left blank png will be used. DOT_IMAGE_FORMAT = png # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the # number of direct children of the root node in a graph is already larger than # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is disabled by default, because dot on Windows does not # seem to support this out of the box. Warning: Depending on the platform used, # enabling this option may lead to badly anti-aliased labels on the edges of # a graph (i.e. they become hard to read). DOT_TRANSPARENT = NO # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES #--------------------------------------------------------------------------- # Options related to the search engine #--------------------------------------------------------------------------- # The SEARCHENGINE tag specifies whether or not a search engine should be # used. If set to NO the values of all tags below this one will be ignored. SEARCHENGINE = NO ViennaCL-1.5.1-src/doc/CMakeLists.txt000644 001750 001750 00000006316 12267307531 017301 0ustar00rupprupp000000 000000 if(BUILD_DOXYGEN_DOCS) configure_file(Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/doxygen/html/index.html COMMAND ${DOXYGEN_EXECUTABLE} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} VERBATIM) # Runs only once add_custom_target(apidoc-all ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/doxygen/html/index.html) # For manual updates add_custom_target(apidoc COMMAND ${DOXYGEN_EXECUTABLE} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} VERBATIM) endif() if(BUILD_MANUAL) set(MANUAL_SRCS manual/algorithms.tex manual/benchmarks.tex manual/changelogs.tex manual/contributors.tex manual/cover.tex manual/custom-contexts.tex manual/custom-kernels.tex manual/design.tex manual/figures/logo_px200.eps manual/figures/matvec2.eps manual/figures/note.eps manual/figures/solver.eps manual/figures/tip.eps manual/figures/TU_Signet_CMYK.eps manual/IEEEtran_v1.13.bst manual/additional-algorithms.tex manual/installation.tex manual/introduction.tex manual/keywords.tex manual/license.tex manual/multi-device.tex manual/operations.tex manual/other-libs.tex manual/shared-lib.tex manual/setup.tex manual/structured-matrices.tex manual/tuning.tex manual/types.tex manual/versioning.tex manual/viennacl.bib manual/viennacl.tex) set(MANUAL_OBJS "${CMAKE_CURRENT_BINARY_DIR}/viennacl.pdf" "${CMAKE_CURRENT_BINARY_DIR}/manual/viennacl.bbl" "${CMAKE_CURRENT_BINARY_DIR}/manual/viennacl.blg" "${CMAKE_CURRENT_BINARY_DIR}/manual/viennacl.dvi" "${CMAKE_CURRENT_BINARY_DIR}/manual/viennacl.idx" "${CMAKE_CURRENT_BINARY_DIR}/manual/viennacl.log" "${CMAKE_CURRENT_BINARY_DIR}/manual/viennacl.out" "${CMAKE_CURRENT_BINARY_DIR}/manual/viennacl.ps" "${CMAKE_CURRENT_BINARY_DIR}/manual/viennacl.toc") foreach(f IN LISTS MANUAL_SRCS) configure_file(${f} ${CMAKE_CURRENT_BINARY_DIR}/${f} COPYONLY) if(f MATCHES "(.*)\\.tex$") list(APPEND MANUAL_OBJS "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_MATCH_1}.aux") endif() endforeach() add_custom_command(OUTPUT ${MANUAL_OBJS} COMMAND ${LATEX_COMPILER} viennacl.tex COMMAND ${LATEX_COMPILER} viennacl.tex COMMAND ${BIBTEX_COMPILER} viennacl COMMAND ${LATEX_COMPILER} viennacl.tex COMMAND ${BIBTEX_COMPILER} viennacl COMMAND ${LATEX_COMPILER} viennacl.tex COMMAND ${DVIPS_CONVERTER} -Ppdf -G0 -ta4 viennacl.dvi COMMAND ${PS2PDF_CONVERTER} -dPDFSETTINGS=/prepress -dCompatibilityLevel=1.3 -dMAxSubsetPct=100 -dSubsetFonts=true -dEmbedAllFonts=true -sPAPERSIZE=a4 -dAutoRotatePages=/None -dOptimize=true viennacl.ps ../viennacl.pdf DEPENDS ${MANUAL_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/manual COMMENT "Generating manual viennacl.pdf" VERBATIM) add_custom_target(manual ALL DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/viennacl.pdf") endif() ViennaCL-1.5.1-src/doc/manual/000755 001750 001750 00000000000 12267307531 016010 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/doc/manual/algorithms.tex000644 001750 001750 00000046174 12235435247 020720 0ustar00rupprupp000000 000000 \chapter{Algorithms} \label{chap:algorithms} This chapter gives an overview over the available algorithms in {\ViennaCL}. The focus of {\ViennaCL} is on iterative solvers, for which {\ViennaCL} provides a generic implementation that allows the use of the same code on the CPU (either using \ublas, Eigen, MTL4 or \OpenCL) and on the GPU (using \OpenCL). \section{Direct Solvers} \label{sec:direct-solvers} {\ViennaCLversion} provides triangular solvers and LU factorization without pivoting for the solution of dense linear systems. The interface is similar to that of {\ublas} \begin{lstlisting} using namespace viennacl::linalg; //to keep solver calls short viennacl::matrix vcl_matrix; viennacl::vector vcl_rhs; viennacl::vector vcl_result; /* Set up matrix and vectors here */ //solution of an upper triangular system: vcl_result = solve(vcl_matrix, vcl_rhs, upper_tag()); //solution of a lower triangular system: vcl_result = solve(vcl_matrix, vcl_rhs, lower_tag()); //solution of a full system right into the load vector vcl_rhs: lu_factorize(vcl_matrix); lu_substitute(vcl_matrix, vcl_rhs); \end{lstlisting} In {\ViennaCLminorversion} there is no pivoting included in the LU factorization process, hence the computation may break down or yield results with poor accuracy. However, for certain classes of matrices (like diagonal dominant matrices) good results can be obtained without pivoting. It is also possible to solve for multiple right hand sides: \begin{lstlisting} using namespace viennacl::linalg; //to keep solver calls short viennacl::matrix vcl_matrix; viennacl::matrix vcl_rhs_matrix; viennacl::matrix vcl_result; /* Set up matrices here */ //solution of an upper triangular system: vcl_result = solve(vcl_matrix, vcl_rhs_matrix, upper_tag()); //solution of a lower triangular system: vcl_result = solve(vcl_matrix, vcl_rhs_matrix, lower_tag()); \end{lstlisting} \section{Iterative Solvers} \label{sec:iterative-solvers} {\ViennaCL} provides different iterative solvers for various classes of matrices, listed in Tab.~\ref{tab:linear-solvers}. Unlike direct solvers, the convergence of iterative solvers relies on certain properties of the system matrix. Keep in mind that an iterative solver may fail to converge, especially if the matrix is ill conditioned or a wrong solver is chosen. \TIP{For full details on linear solver calls, refer to the reference documentation located in \texttt{doc/doxygen/} and to the tutorials} \TIP{The iterative solvers can directly be used for {\ublas}, Eigen and MTL4 objects! Please have a look at Chap.~\ref{chap:other-libs} and the respective tutorials in the examples/tutorials/ folder.} \NOTE{In {\ViennaCLversion}, GMRES using ATI GPUs yields wrong results due to a bug in Stream SDK v2.1. Consider using newer versions of the Stream SDK.} \begin{lstlisting} viennacl::compressed_matrix vcl_matrix; viennacl::vector vcl_rhs; viennacl::vector vcl_result; /* Set up matrix and vectors here */ //solution using conjugate gradient solver: vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::cg_tag()); //solution using BiCGStab solver: vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::bicgstab_tag()); //solution using GMRES solver: vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::gmres_tag()); \end{lstlisting} \begin{table}[tb] \begin{center} \renewcommand{\arraystretch}{1.2} \begin{tabular}{p{4cm}|p{3cm}|p{7.5cm}} Method & Matrix class & ViennaCL\\ \hline Conjugate Gradient (CG) & symmetric positive definite & \texttt{y = solve(A, x, cg\_tag());} \\ Stabilized Bi-CG (BiCGStab) & non-symmetric & \texttt{y = solve(A, x, bicgstab\_tag());} \\ Generalized Minimum Residual (GMRES) & general & \texttt{y = solve(A, x, gmres\_tag());} \\ \hline \end{tabular} \caption{Linear solver routines in {\ViennaCL} for the computation of $y$ in the expression $Ay = x$ with given $A$, $x$.} \label{tab:linear-solvers} \end{center} \end{table} Customized error tolerances can be set in the solver tags. The convention is that solver tags take the relative error tolerance as first argument and the maximum number of iteration steps as second argument. Furthermore, after the solver run the number of iterations and the estimated error can be obtained from the solver tags as follows: \begin{lstlisting} // conjugate gradient solver with tolerance 1e10 // and at most 100 iterations: viennacl::linalg::cg_tag custom_cg(1e-10, 100); vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, custom_cg); //print number of iterations taken and estimated error: std::cout << "No. of iters: " << custom_cg.iters() << std::endl; std::cout << "Est. error: " << custom_cg.error() << std::endl; \end{lstlisting} The BiCGStab solver tag can be customized in exactly the same way. The GMRES solver tag takes as third argument the dimension of the Krylov space. Thus, a tag for GMRES(30) with tolerance $1\mathrm{E}\!-\!10$ and at most $100$ total iterations (hence, up to three restarts) can be set up by \begin{lstlisting} viennacl::linalg::gmres_tag custom_gmres(1e-10, 100, 30); \end{lstlisting} \section{Preconditioners} \label{sec:preconditioner} {\ViennaCL} ships with a generic implementation of several preconditioners. The preconditioner setup is expect for simple diagonal preconditioners always carried out on the CPU host due to the need for dynamically allocating memory. Thus, one may not obtain an overall performance benefit if too much time is spent on the preconditioner setup. \TIP{The preconditioner also works for {\ublas} types!} An overview of preconditioners available for the various sparse matrix types is as follows: \begin{center} \begin{tabular}{|l|c|c|c|c|c|c|} \hline Matrix Type & ICHOL & (Block-)ILU[0/T] & Jacobi & Row-scaling & AMG & SPAI \\ \hline \lstinline|compressed_matrix| & yes & yes & yes & yes & yes & yes \\ \lstinline|coordinate_matrix| & no & no & yes & yes & no & no \\ \lstinline|ell_matrix| & no & no & no & no & no & no \\ \lstinline|hyb_matrix| & no & no & no & no & no & no \\ \hline \end{tabular} \end{center} Broader support of preconditioners particularly for \lstinline|ell_matrix| and \lstinline|hyb_matrix| is scheduled for future releases. AMG and SPAI preconditioners are described in Chap.~\ref{chap:additional-algorithms}. In the following it is assumed that the sparse linear system of equations is given as follows: \begin{lstlisting} typedef viennacl::compressed_matrix SparseMatrix; SparseMatrix vcl_matrix; viennacl::vector vcl_rhs; viennacl::vector vcl_result; /* Set up matrix and vectors here */ \end{lstlisting} % \begin{table}[tb] % \begin{center} % \renewcommand{\arraystretch}{1.2} % \begin{tabular}{p{3cm}|p{4cm}|p{7cm}} % Method & Brief description & Parameters\\ % \hline % ILUT & incomplete LU factorization & First parameter: Maximum number of entries % per row. Second parameter: Drop tolerance. \\ % Jacobi & Divide each row in $A$ by its diagonal entry & none \\ % Row Scaling & Divide each row in $A$ by its norm & First parameter specifies % the norm (1: $l^1$-norm, 2: $l^2$-norm)\\ % \hline % \end{tabular} % \caption{Preconditioners for iterative solvers in {\ViennaCL}.} % \label{tab:preconditioners} % \end{center} % \end{table} \subsection{Incomplete LU Factorization with Threshold (ILUT)} The incomplete LU factorization preconditioner aims at computing sparse matrices lower and upper triangular matrices $L$ and $U$ such that the sparse system matrix is approximately given by $A \approx LU$. In order to control the sparsity pattern of $L$ and $U$, a threshold strategy is used (ILUT) \cite{saad-iterative-solution}. Due to the serial nature of the preconditioner, the setup of ILUT is always computed on the CPU using the respective ViennaCL backend. \begin{lstlisting} //compute ILUT preconditioner: viennacl::linalg::ilut_tag ilut_config; viennacl::linalg::ilut_precond< SparseMatrix > vcl_ilut(vcl_matrix, ilut_config); //solve (e.g. using conjugate gradient solver) vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::bicgstab_tag(), vcl_ilut); //preconditioner here \end{lstlisting} The triangular substitutions may be applied in parallel on GPUs by enabling \emph{level-scheduling} \cite{saad-iterative-solution} via the member function call \lstinline|use_level_scheduling(true)| in the \lstinline|ilut_config| object. Three parameters can be passed to the constructor of \lstinline|ilut_tag|: The first specifies the maximum number of entries per row in $L$ and $U$, while the second parameter specifies the drop tolerance. The third parameter is the boolean specifying whether level scheduling should be used. \TIP{The performance of level scheduling depends strongly on the matrix pattern and is thus disabled by default.} \subsection{Incomplete LU Factorization with Static Pattern (ILU0)} Similar to ILUT, ILU0 computes an approximate LU factorization with sparse factors L and U. While ILUT determines the location of nonzero entries on the fly, ILU0 uses the sparsity pattern of A for the sparsity pattern of L and U \cite{saad-iterative-solution}. Due to the serial nature of the preconditioner, the setup of ILU0 is computed on the CPU. \begin{lstlisting} //compute ILU0 preconditioner: viennacl::linalg::ilu0_tag ilu0_config; viennacl::linalg::ilu0_precond< SparseMatrix > vcl_ilut(vcl_matrix, ilu0_config); //solve (e.g. using conjugate gradient solver) vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::bicgstab_tag(), vcl_ilut); //preconditioner here \end{lstlisting} The triangular substitutions may be applied in parallel on GPUs by enabling \emph{level-scheduling} \cite{saad-iterative-solution} via the member function call \lstinline|use_level_scheduling(true)| in the \lstinline|ilu0_config| object. One parameter can be passed to the constructor of \lstinline|ilu0_tag|, being the boolean specifying whether level scheduling should be used. \TIP{The performance of level scheduling depends strongly on the matrix pattern and is thus disabled by default.} \subsection{Block-ILU} To overcome the serial nature of ILUT and ILU0 applied to the full system matrix, a parallel variant is to apply ILU to diagonal blocks of the system matrix. This is accomplished by the \lstinline|block_ilu| preconditioner, which takes the system matrix type as first template argument and the respective ILU-tag type as second template argument (either \lstinline|ilut_tag| or \lstinline|ilu0_tag|). Support for accelerators using {\CUDA} or {\OpenCL} is provided. \begin{lstlisting} //compute block-ILU preconditioner using ILU0 for each block: block_ilu_precond vcl_block_ilu0(vcl_matrix, ilu0_tag()); //solve vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::bicgstab_tag(), vcl_block_ilu0); \end{lstlisting} A third argument can be passed to the constructor of \lstinline|block_ilu_precond|: Either the number of blocks to be used (defaults to $8$), or an index vector with fine-grained control over the blocks. Refer to the Doxygen pages in doc/doxygen for details. \TIP{The number of blocks is a design parameter for your sparse linear system at hand. Higher number of blocks leads to better memory bandwidth utilization on GPUs, but may increase the number of solver iterations.} \subsection{Jacobi Preconditioner} A Jacobi preconditioner is a simple diagonal preconditioner given by the reciprocals of the diagonal entries of the system matrix $A$. Use the preconditioner as follows: \begin{lstlisting} //compute Jacobi preconditioner: jacobi_precond< SparseMatrix > vcl_jacobi(vcl_matrix, viennacl::linalg::jacobi_tag()); //solve (e.g. using conjugate gradient solver) vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::cg_tag(), vcl_jacobi); \end{lstlisting} \subsection{Row Scaling Preconditioner} A row scaling preconditioner is a simple diagonal preconditioner given by the reciprocals of the norms of the rows of the system matrix $A$. Use the preconditioner as follows: \begin{lstlisting} //compute row scaling preconditioner: row_scaling< SparseMatrix > vcl_row_scaling(vcl_matrix, viennacl::linalg::row_scaling_tag()); //solve (e.g. using conjugate gradient solver) vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::cg_tag(), vcl_row_scaling); \end{lstlisting} The tag \lstinline|viennacl::linalg::row_scaling_tag()| can be supplied with a parameter denoting the norm to be used. A value of \lstinline|1| specifies the $l^1$-norm, while a value of $2$ selects the $l^2$-norm (default). \section{Eigenvalue Computations} %{\ViennaCL} Two algorithms for the computations of the eigenvalues of a matrix $A$ are implemented in {\ViennaCL}: \begin{itemize} \item The Power Iteration \cite{golub:matrix-computations} \item The Lanczos Algorithm \cite{simon:lanczos-pro} \end{itemize} Depending on the parameter \lstinline|tag| either one of them is called. Both algorithms can be used for either {\ublas} or {\ViennaCL} compressed matrices.\\ In order to get the eigenvalue with the greatest absolut value the power iteration should be called. \\ The Lanczos algorithm returns a vector of the largest eigenvalues with the same type as the entries of the matrix. The algorithms are called for a matrix object \lstinline|A| by \begin{lstlisting} std::vector largest_eigenvalues = viennacl::linalg::eig(A, ltag); double largest_eigenvalue = viennacl::linalg::eig(A, ptag); \end{lstlisting} \subsection{Power Iteration} The Power iteration aims at computing the eigenvalues of a matrix by calculating the product of the matrix and a vector for several times, where the resulting vector is used for the next product of the matrix and so on. The computation stops as soon as the norm of the vector converges. \\ The final vector is the eigenvector to the eigenvalue with the greatest absolut value.\\ To call this algorithm, \lstinline|piter_tag| must be used. This tag has only one parameter: \\ \lstinline|terminationfactor| defines the accuracy of the computation, i.e. if the new norm of the eigenvector changes less than this parameter the computation stops and returns the corresponding eigenvalue (default: $1e-10$).\\ The call of the constructor may look like the following: \begin{lstlisting} viennacl::linalg::piter_tag ptag(1e-8); \end{lstlisting} \TIP{Example code can be found in \lstinline|examples/tutorial/power-iter.cpp|.} \subsection{The Lanczos Algorithm} In order to compute the eigenvalues of a sparse high-dimensional matrix the Lanczos algorithm can be used to find these. This algorithm reformulates the given high-dimensional matrix in a way such that the matrix can be rewritten in a tridiagonal matrix at much lower dimension. The eigenvalues of this tridiagonal matrix are equal to the largest eigenvalues of the original matrix. \\ The eigenvalues of the tridiagonal matrix are calculated by using the bisection method \cite{golub:matrix-computations}. \\ To call this Lanczos algorithm, \lstinline|lanczos_tag| must be used. This tag has several parameters that can be passed to the constructor: \begin{itemize} \item The exponent of epsilon for the tolerance of the reorthogonalization, defined by the parameter \lstinline|factor| (default: $0.75$) \item The method of the Lanczos algorithm: $0$ uses partial reorthogonalization, $1$ full reothogonalization and $2$ does not use reorthogonalization (default: $0$) \item The number of eigenvalues that are returned is specified by \lstinline|num_eigenvalues| (default: $10$) \item The size of the krylov space used for the computations can be set by the parameter \lstinline|krylov_size| (default: $100$). The maximum number of iterations can be equal or less this parameter \end{itemize} The call of the constructor may look like the following: \begin{lstlisting} viennacl::linalg::lanczos_tag ltag(0.85, 15, 0, 200); \end{lstlisting} \TIP{Example code can be found in \lstinline|examples/tutorial/lanczos.cpp|.} \section{QR Factorization} \NOTE{The current QR factorization implementation depends on {\ublas}.} A matrix $A \in \mathbb{R}^{n\times m}$ can be factored into $A = Q R$, where $Q \in \mathbb{R}^{n\times n}$ is an orthogonal matrix and $R \in \mathbb{R}^{n \times m}$ is upper triangular. This so-called QR-factorization is important for eigenvalue computations as well as for the solution of least-squares problems \cite{golub:matrix-computations}. {\ViennaCL} provides a generic implementation of the QR-factorization using Householder reflections in file \lstinline|viennacl/linalg/qr.hpp|. An example application can be found in \lstinline|examples/tutorial/qr.hpp|. The Householder reflectors $v_i$ defining the Householder reflection $I - \beta_i v_i v_i^{\mathrm{T}}$ are stored in the columns below the diagonal of the input matrix $A$ \cite{golub:matrix-computations}. The normalization coefficients $\beta_i$ are returned by the worker function \lstinline|inplace_qr|. The upper triangular matrix $R$ is directly written to the upper triangular part of $A$. \begin{lstlisting} std::vector betas = viennacl::linalg::inplace_qr(A, 12); \end{lstlisting} If $A$ is a dense matrix from \ublas, the calculation is carried out on the CPU using a single thread. If $A$ is a \lstinline|viennacl::matrix|, a hybrid implementation is used: The panel factorization is carried out using \ublas, while expensive BLAS level 3 operations are computed on the OpenCL device using multiple threads. Typically, the orthogonal matrix $Q$ is kept in inplicit form because of computational efficiency However, if $Q$ and $R$ have to be computed explicitly, the function \lstinline|recoverQ| can be used: \begin{lstlisting} viennacl::linalg::recoverQ(A, betas, Q, R); \end{lstlisting} Here, \lstinline|A| is the inplace QR-factored matrix, \lstinline|betas| are the coefficients of the Householder reflectors as returned by \lstinline|inplace_qr|, while \lstinline|Q| and \lstinline|R| are the destination matrices. However, the explicit formation of $Q$ is expensive and is usually avoided. For a number of applications of the QR factorization it is required to apply $Q^T$ to a vector $b$. This is accomplished by \begin{lstlisting} viennacl::linalg::inplace_qr_apply_trans_Q(A, betas, b); \end{lstlisting} without setting up $Q$ (or $Q^T$) explicitly. \TIP{Have a look at \lstinline|examples/tutorial/least-squares.cpp| for a least-squares computation using QR factorizations.} ViennaCL-1.5.1-src/doc/manual/figures/000755 001750 001750 00000000000 12267307531 017454 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/doc/manual/figures/TU_Signet_CMYK.eps000644 001750 001750 00000666255 12267307531 022675 0ustar00rupprupp000000 000000 %!PS-Adobe-3.1 EPSF-3.0 %ADO_DSC_Encoding: MacOS Roman %%Title: TU_Signet_CMYK.eps %%Creator: Adobe Illustrator(R) 14.0 %%For: iBook %%CreationDate: 08.09.09 %%BoundingBox: 0 0 227 227 %%HiResBoundingBox: 0 0 226.7720 226.7715 %%CropBox: 0 0 226.7720 226.7715 %%LanguageLevel: 2 %%DocumentData: Clean7Bit %ADOBeginClientInjection: DocumentHeader "AI11EPS" %%AI8_CreatorVersion: 14.0.0 %AI9_PrintingDataBegin %ADO_BuildNumber: Adobe Illustrator(R) 14.0.0 x367 R agm 4.4890 ct 5.1541 %ADO_ContainsXMP: MainFirst %ADOEndClientInjection: DocumentHeader "AI11EPS" %%Pages: 1 %%DocumentNeededResources: %%DocumentSuppliedResources: procset Adobe_AGM_Image 1.0 0 %%+ procset Adobe_CoolType_Utility_T42 1.0 0 %%+ procset Adobe_CoolType_Utility_MAKEOCF 1.23 0 %%+ procset Adobe_CoolType_Core 2.31 0 %%+ procset Adobe_AGM_Core 2.0 0 %%+ procset Adobe_AGM_Utils 1.0 0 %%DocumentFonts: %%DocumentNeededFonts: %%DocumentNeededFeatures: %%DocumentSuppliedFeatures: %%DocumentProcessColors: %%DocumentCustomColors: (100c 38m 0y 15k) %%CMYKCustomColor: 1 0.3800 0 0.1500 (100c 38m 0y 15k) %%RGBCustomColor: %%EndComments %%BeginDefaults %%ViewingOrientation: 1 0 0 1 %%EndDefaults %%BeginProlog %%BeginResource: procset Adobe_AGM_Utils 1.0 0 %%Version: 1.0 0 %%Copyright: Copyright(C)2000-2006 Adobe Systems, Inc. All Rights Reserved. systemdict/setpacking known {currentpacking true setpacking}if userdict/Adobe_AGM_Utils 75 dict dup begin put /bdf {bind def}bind def /nd{null def}bdf /xdf {exch def}bdf /ldf {load def}bdf /ddf {put}bdf /xddf {3 -1 roll put}bdf /xpt {exch put}bdf /ndf { exch dup where{ pop pop pop }{ xdf }ifelse }def /cdndf { exch dup currentdict exch known{ pop pop }{ exch def }ifelse }def /gx {get exec}bdf /ps_level /languagelevel where{ pop systemdict/languagelevel gx }{ 1 }ifelse def /level2 ps_level 2 ge def /level3 ps_level 3 ge def /ps_version {version cvr}stopped{-1}if def /set_gvm {currentglobal exch setglobal}bdf /reset_gvm {setglobal}bdf /makereadonlyarray { /packedarray where{pop packedarray }{ array astore readonly}ifelse }bdf /map_reserved_ink_name { dup type/stringtype eq{ dup/Red eq{ pop(_Red_) }{ dup/Green eq{ pop(_Green_) }{ dup/Blue eq{ pop(_Blue_) }{ dup()cvn eq{ pop(Process) }if }ifelse }ifelse }ifelse }if }bdf /AGMUTIL_GSTATE 22 dict def /get_gstate { AGMUTIL_GSTATE begin /AGMUTIL_GSTATE_clr_spc currentcolorspace def /AGMUTIL_GSTATE_clr_indx 0 def /AGMUTIL_GSTATE_clr_comps 12 array def mark currentcolor counttomark {AGMUTIL_GSTATE_clr_comps AGMUTIL_GSTATE_clr_indx 3 -1 roll put /AGMUTIL_GSTATE_clr_indx AGMUTIL_GSTATE_clr_indx 1 add def}repeat pop /AGMUTIL_GSTATE_fnt rootfont def /AGMUTIL_GSTATE_lw currentlinewidth def /AGMUTIL_GSTATE_lc currentlinecap def /AGMUTIL_GSTATE_lj currentlinejoin def /AGMUTIL_GSTATE_ml currentmiterlimit def currentdash/AGMUTIL_GSTATE_do xdf/AGMUTIL_GSTATE_da xdf /AGMUTIL_GSTATE_sa currentstrokeadjust def /AGMUTIL_GSTATE_clr_rnd currentcolorrendering def /AGMUTIL_GSTATE_op currentoverprint def /AGMUTIL_GSTATE_bg currentblackgeneration cvlit def /AGMUTIL_GSTATE_ucr currentundercolorremoval cvlit def currentcolortransfer cvlit/AGMUTIL_GSTATE_gy_xfer xdf cvlit/AGMUTIL_GSTATE_b_xfer xdf cvlit/AGMUTIL_GSTATE_g_xfer xdf cvlit/AGMUTIL_GSTATE_r_xfer xdf /AGMUTIL_GSTATE_ht currenthalftone def /AGMUTIL_GSTATE_flt currentflat def end }def /set_gstate { AGMUTIL_GSTATE begin AGMUTIL_GSTATE_clr_spc setcolorspace AGMUTIL_GSTATE_clr_indx{AGMUTIL_GSTATE_clr_comps AGMUTIL_GSTATE_clr_indx 1 sub get /AGMUTIL_GSTATE_clr_indx AGMUTIL_GSTATE_clr_indx 1 sub def}repeat setcolor AGMUTIL_GSTATE_fnt setfont AGMUTIL_GSTATE_lw setlinewidth AGMUTIL_GSTATE_lc setlinecap AGMUTIL_GSTATE_lj setlinejoin AGMUTIL_GSTATE_ml setmiterlimit AGMUTIL_GSTATE_da AGMUTIL_GSTATE_do setdash AGMUTIL_GSTATE_sa setstrokeadjust AGMUTIL_GSTATE_clr_rnd setcolorrendering AGMUTIL_GSTATE_op setoverprint AGMUTIL_GSTATE_bg cvx setblackgeneration AGMUTIL_GSTATE_ucr cvx setundercolorremoval AGMUTIL_GSTATE_r_xfer cvx AGMUTIL_GSTATE_g_xfer cvx AGMUTIL_GSTATE_b_xfer cvx AGMUTIL_GSTATE_gy_xfer cvx setcolortransfer AGMUTIL_GSTATE_ht/HalftoneType get dup 9 eq exch 100 eq or { currenthalftone/HalftoneType get AGMUTIL_GSTATE_ht/HalftoneType get ne { mark AGMUTIL_GSTATE_ht{sethalftone}stopped cleartomark }if }{ AGMUTIL_GSTATE_ht sethalftone }ifelse AGMUTIL_GSTATE_flt setflat end }def /get_gstate_and_matrix { AGMUTIL_GSTATE begin /AGMUTIL_GSTATE_ctm matrix currentmatrix def end get_gstate }def /set_gstate_and_matrix { set_gstate AGMUTIL_GSTATE begin AGMUTIL_GSTATE_ctm setmatrix end }def /AGMUTIL_str256 256 string def /AGMUTIL_src256 256 string def /AGMUTIL_dst64 64 string def /AGMUTIL_srcLen nd /AGMUTIL_ndx nd /AGMUTIL_cpd nd /capture_cpd{ //Adobe_AGM_Utils/AGMUTIL_cpd currentpagedevice ddf }def /thold_halftone { level3 {sethalftone currenthalftone} { dup/HalftoneType get 3 eq { sethalftone currenthalftone }{ begin Width Height mul{ Thresholds read{pop}if }repeat end currenthalftone }ifelse }ifelse }def /rdcmntline { currentfile AGMUTIL_str256 readline pop (%)anchorsearch{pop}if }bdf /filter_cmyk { dup type/filetype ne{ exch()/SubFileDecode filter }{ exch pop } ifelse [ exch { AGMUTIL_src256 readstring pop dup length/AGMUTIL_srcLen exch def /AGMUTIL_ndx 0 def AGMCORE_plate_ndx 4 AGMUTIL_srcLen 1 sub{ 1 index exch get AGMUTIL_dst64 AGMUTIL_ndx 3 -1 roll put /AGMUTIL_ndx AGMUTIL_ndx 1 add def }for pop AGMUTIL_dst64 0 AGMUTIL_ndx getinterval } bind /exec cvx ]cvx }bdf /filter_indexed_devn { cvi Names length mul names_index add Lookup exch get }bdf /filter_devn { 4 dict begin /srcStr xdf /dstStr xdf dup type/filetype ne{ 0()/SubFileDecode filter }if [ exch [ /devicen_colorspace_dict/AGMCORE_gget cvx/begin cvx currentdict/srcStr get/readstring cvx/pop cvx /dup cvx/length cvx 0/gt cvx[ Adobe_AGM_Utils/AGMUTIL_ndx 0/ddf cvx names_index Names length currentdict/srcStr get length 1 sub{ 1/index cvx/exch cvx/get cvx currentdict/dstStr get/AGMUTIL_ndx/load cvx 3 -1/roll cvx/put cvx Adobe_AGM_Utils/AGMUTIL_ndx/AGMUTIL_ndx/load cvx 1/add cvx/ddf cvx }for currentdict/dstStr get 0/AGMUTIL_ndx/load cvx/getinterval cvx ]cvx/if cvx /end cvx ]cvx bind /exec cvx ]cvx end }bdf /AGMUTIL_imagefile nd /read_image_file { AGMUTIL_imagefile 0 setfileposition 10 dict begin /imageDict xdf /imbufLen Width BitsPerComponent mul 7 add 8 idiv def /imbufIdx 0 def /origDataSource imageDict/DataSource get def /origMultipleDataSources imageDict/MultipleDataSources get def /origDecode imageDict/Decode get def /dstDataStr imageDict/Width get colorSpaceElemCnt mul string def imageDict/MultipleDataSources known{MultipleDataSources}{false}ifelse { /imbufCnt imageDict/DataSource get length def /imbufs imbufCnt array def 0 1 imbufCnt 1 sub{ /imbufIdx xdf imbufs imbufIdx imbufLen string put imageDict/DataSource get imbufIdx[AGMUTIL_imagefile imbufs imbufIdx get/readstring cvx/pop cvx]cvx put }for DeviceN_PS2{ imageDict begin /DataSource[DataSource/devn_sep_datasource cvx]cvx def /MultipleDataSources false def /Decode[0 1]def end }if }{ /imbuf imbufLen string def Indexed_DeviceN level3 not and DeviceN_NoneName or{ /srcDataStrs[imageDict begin currentdict/MultipleDataSources known{MultipleDataSources{DataSource length}{1}ifelse}{1}ifelse { Width Decode length 2 div mul cvi string }repeat end]def imageDict begin /DataSource[AGMUTIL_imagefile Decode BitsPerComponent false 1/filter_indexed_devn load dstDataStr srcDataStrs devn_alt_datasource/exec cvx]cvx def /Decode[0 1]def end }{ imageDict/DataSource[1 string dup 0 AGMUTIL_imagefile Decode length 2 idiv string/readstring cvx/pop cvx names_index/get cvx/put cvx]cvx put imageDict/Decode[0 1]put }ifelse }ifelse imageDict exch load exec imageDict/DataSource origDataSource put imageDict/MultipleDataSources origMultipleDataSources put imageDict/Decode origDecode put end }bdf /write_image_file { begin {(AGMUTIL_imagefile)(w+)file}stopped{ false }{ Adobe_AGM_Utils/AGMUTIL_imagefile xddf 2 dict begin /imbufLen Width BitsPerComponent mul 7 add 8 idiv def MultipleDataSources{DataSource 0 get}{DataSource}ifelse type/filetype eq{ /imbuf imbufLen string def }if 1 1 Height MultipleDataSources not{Decode length 2 idiv mul}if{ pop MultipleDataSources{ 0 1 DataSource length 1 sub{ DataSource type dup /arraytype eq{ pop DataSource exch gx }{ /filetype eq{ DataSource exch get imbuf readstring pop }{ DataSource exch get }ifelse }ifelse AGMUTIL_imagefile exch writestring }for }{ DataSource type dup /arraytype eq{ pop DataSource exec }{ /filetype eq{ DataSource imbuf readstring pop }{ DataSource }ifelse }ifelse AGMUTIL_imagefile exch writestring }ifelse }for end true }ifelse end }bdf /close_image_file { AGMUTIL_imagefile closefile(AGMUTIL_imagefile)deletefile }def statusdict/product known userdict/AGMP_current_show known not and{ /pstr statusdict/product get def pstr(HP LaserJet 2200)eq pstr(HP LaserJet 4000 Series)eq or pstr(HP LaserJet 4050 Series )eq or pstr(HP LaserJet 8000 Series)eq or pstr(HP LaserJet 8100 Series)eq or pstr(HP LaserJet 8150 Series)eq or pstr(HP LaserJet 5000 Series)eq or pstr(HP LaserJet 5100 Series)eq or pstr(HP Color LaserJet 4500)eq or pstr(HP Color LaserJet 4600)eq or pstr(HP LaserJet 5Si)eq or pstr(HP LaserJet 1200 Series)eq or pstr(HP LaserJet 1300 Series)eq or pstr(HP LaserJet 4100 Series)eq or { userdict/AGMP_current_show/show load put userdict/show{ currentcolorspace 0 get /Pattern eq {false charpath f} {AGMP_current_show}ifelse }put }if currentdict/pstr undef }if /consumeimagedata { begin AGMIMG_init_common currentdict/MultipleDataSources known not {/MultipleDataSources false def}if MultipleDataSources { DataSource 0 get type dup/filetype eq { 1 dict begin /flushbuffer Width cvi string def 1 1 Height cvi { pop 0 1 DataSource length 1 sub { DataSource exch get flushbuffer readstring pop pop }for }for end }if dup/arraytype eq exch/packedarraytype eq or DataSource 0 get xcheck and { Width Height mul cvi { 0 1 DataSource length 1 sub {dup DataSource exch gx length exch 0 ne{pop}if}for dup 0 eq {pop exit}if sub dup 0 le {exit}if }loop pop }if } { /DataSource load type dup/filetype eq { 1 dict begin /flushbuffer Width Decode length 2 idiv mul cvi string def 1 1 Height{pop DataSource flushbuffer readstring pop pop}for end }if dup/arraytype eq exch/packedarraytype eq or/DataSource load xcheck and { Height Width BitsPerComponent mul 8 BitsPerComponent sub add 8 idiv Decode length 2 idiv mul mul { DataSource length dup 0 eq {pop exit}if sub dup 0 le {exit}if }loop pop }if }ifelse end }bdf /addprocs { 2{/exec load}repeat 3 1 roll [5 1 roll]bind cvx }def /modify_halftone_xfer { currenthalftone dup length dict copy begin currentdict 2 index known{ 1 index load dup length dict copy begin currentdict/TransferFunction known{ /TransferFunction load }{ currenttransfer }ifelse addprocs/TransferFunction xdf currentdict end def currentdict end sethalftone }{ currentdict/TransferFunction known{ /TransferFunction load }{ currenttransfer }ifelse addprocs/TransferFunction xdf currentdict end sethalftone pop }ifelse }def /clonearray { dup xcheck exch dup length array exch Adobe_AGM_Core/AGMCORE_tmp -1 ddf { Adobe_AGM_Core/AGMCORE_tmp 2 copy get 1 add ddf dup type/dicttype eq { Adobe_AGM_Core/AGMCORE_tmp get exch clonedict Adobe_AGM_Core/AGMCORE_tmp 4 -1 roll ddf }if dup type/arraytype eq { Adobe_AGM_Core/AGMCORE_tmp get exch clonearray Adobe_AGM_Core/AGMCORE_tmp 4 -1 roll ddf }if exch dup Adobe_AGM_Core/AGMCORE_tmp get 4 -1 roll put }forall exch{cvx}if }bdf /clonedict { dup length dict begin { dup type/dicttype eq {clonedict}if dup type/arraytype eq {clonearray}if def }forall currentdict end }bdf /DeviceN_PS2 { /currentcolorspace AGMCORE_gget 0 get/DeviceN eq level3 not and }bdf /Indexed_DeviceN { /indexed_colorspace_dict AGMCORE_gget dup null ne{ dup/CSDBase known{ /CSDBase get/CSD get_res/Names known }{ pop false }ifelse }{ pop false }ifelse }bdf /DeviceN_NoneName { /Names where{ pop false Names { (None)eq or }forall }{ false }ifelse }bdf /DeviceN_PS2_inRip_seps { /AGMCORE_in_rip_sep where { pop dup type dup/arraytype eq exch/packedarraytype eq or { dup 0 get/DeviceN eq level3 not and AGMCORE_in_rip_sep and { /currentcolorspace exch AGMCORE_gput false }{ true }ifelse }{ true }ifelse }{ true }ifelse }bdf /base_colorspace_type { dup type/arraytype eq{0 get}if }bdf /currentdistillerparams where{pop currentdistillerparams/CoreDistVersion get 5000 lt}{true}ifelse { /pdfmark_5{cleartomark}bind def }{ /pdfmark_5{pdfmark}bind def }ifelse /ReadBypdfmark_5 { currentfile exch 0 exch/SubFileDecode filter /currentdistillerparams where {pop currentdistillerparams/CoreDistVersion get 5000 lt}{true}ifelse {flushfile cleartomark} {/PUT pdfmark}ifelse }bdf /ReadBypdfmark_5_string { 2 dict begin /makerString exch def string/tmpString exch def { currentfile tmpString readline not{pop exit}if makerString anchorsearch { pop pop cleartomark exit }{ 3 copy/PUT pdfmark_5 pop 2 copy(\n)/PUT pdfmark_5 }ifelse }loop end }bdf /xpdfm { { dup 0 get/Label eq { aload length[exch 1 add 1 roll/PAGELABEL }{ aload pop [{ThisPage}<<5 -2 roll>>/PUT }ifelse pdfmark_5 }forall }bdf /lmt{ dup 2 index le{exch}if pop dup 2 index ge{exch}if pop }bdf /int{ dup 2 index sub 3 index 5 index sub div 6 -2 roll sub mul exch pop add exch pop }bdf /ds{ Adobe_AGM_Utils begin }bdf /dt{ currentdict Adobe_AGM_Utils eq{ end }if }bdf systemdict/setpacking known {setpacking}if %%EndResource %%BeginResource: procset Adobe_AGM_Core 2.0 0 %%Version: 2.0 0 %%Copyright: Copyright(C)1997-2007 Adobe Systems, Inc. All Rights Reserved. systemdict/setpacking known { currentpacking true setpacking }if userdict/Adobe_AGM_Core 209 dict dup begin put /Adobe_AGM_Core_Id/Adobe_AGM_Core_2.0_0 def /AGMCORE_str256 256 string def /AGMCORE_save nd /AGMCORE_graphicsave nd /AGMCORE_c 0 def /AGMCORE_m 0 def /AGMCORE_y 0 def /AGMCORE_k 0 def /AGMCORE_cmykbuf 4 array def /AGMCORE_screen[currentscreen]cvx def /AGMCORE_tmp 0 def /AGMCORE_&setgray nd /AGMCORE_&setcolor nd /AGMCORE_&setcolorspace nd /AGMCORE_&setcmykcolor nd /AGMCORE_cyan_plate nd /AGMCORE_magenta_plate nd /AGMCORE_yellow_plate nd /AGMCORE_black_plate nd /AGMCORE_plate_ndx nd /AGMCORE_get_ink_data nd /AGMCORE_is_cmyk_sep nd /AGMCORE_host_sep nd /AGMCORE_avoid_L2_sep_space nd /AGMCORE_distilling nd /AGMCORE_composite_job nd /AGMCORE_producing_seps nd /AGMCORE_ps_level -1 def /AGMCORE_ps_version -1 def /AGMCORE_environ_ok nd /AGMCORE_CSD_cache 0 dict def /AGMCORE_currentoverprint false def /AGMCORE_deltaX nd /AGMCORE_deltaY nd /AGMCORE_name nd /AGMCORE_sep_special nd /AGMCORE_err_strings 4 dict def /AGMCORE_cur_err nd /AGMCORE_current_spot_alias false def /AGMCORE_inverting false def /AGMCORE_feature_dictCount nd /AGMCORE_feature_opCount nd /AGMCORE_feature_ctm nd /AGMCORE_ConvertToProcess false def /AGMCORE_Default_CTM matrix def /AGMCORE_Default_PageSize nd /AGMCORE_Default_flatness nd /AGMCORE_currentbg nd /AGMCORE_currentucr nd /AGMCORE_pattern_paint_type 0 def /knockout_unitsq nd currentglobal true setglobal [/CSA/Gradient/Procedure] { /Generic/Category findresource dup length dict copy/Category defineresource pop }forall setglobal /AGMCORE_key_known { where{ /Adobe_AGM_Core_Id known }{ false }ifelse }ndf /flushinput { save 2 dict begin /CompareBuffer 3 -1 roll def /readbuffer 256 string def mark { currentfile readbuffer{readline}stopped {cleartomark mark} { not {pop exit} if CompareBuffer eq {exit} if }ifelse }loop cleartomark end restore }bdf /getspotfunction { AGMCORE_screen exch pop exch pop dup type/dicttype eq{ dup/HalftoneType get 1 eq{ /SpotFunction get }{ dup/HalftoneType get 2 eq{ /GraySpotFunction get }{ pop { abs exch abs 2 copy add 1 gt{ 1 sub dup mul exch 1 sub dup mul add 1 sub }{ dup mul exch dup mul add 1 exch sub }ifelse }bind }ifelse }ifelse }if }def /np {newpath}bdf /clp_npth {clip np}def /eoclp_npth {eoclip np}def /npth_clp {np clip}def /graphic_setup { /AGMCORE_graphicsave save store concat 0 setgray 0 setlinecap 0 setlinejoin 1 setlinewidth []0 setdash 10 setmiterlimit np false setoverprint false setstrokeadjust //Adobe_AGM_Core/spot_alias gx /Adobe_AGM_Image where{ pop Adobe_AGM_Image/spot_alias 2 copy known{ gx }{ pop pop }ifelse }if /sep_colorspace_dict null AGMCORE_gput 100 dict begin /dictstackcount countdictstack def /showpage{}def mark }def /graphic_cleanup { cleartomark dictstackcount 1 countdictstack 1 sub{end}for end AGMCORE_graphicsave restore }def /compose_error_msg { grestoreall initgraphics /Helvetica findfont 10 scalefont setfont /AGMCORE_deltaY 100 def /AGMCORE_deltaX 310 def clippath pathbbox np pop pop 36 add exch 36 add exch moveto 0 AGMCORE_deltaY rlineto AGMCORE_deltaX 0 rlineto 0 AGMCORE_deltaY neg rlineto AGMCORE_deltaX neg 0 rlineto closepath 0 AGMCORE_&setgray gsave 1 AGMCORE_&setgray fill grestore 1 setlinewidth gsave stroke grestore currentpoint AGMCORE_deltaY 15 sub add exch 8 add exch moveto /AGMCORE_deltaY 12 def /AGMCORE_tmp 0 def AGMCORE_err_strings exch get { dup 32 eq { pop AGMCORE_str256 0 AGMCORE_tmp getinterval stringwidth pop currentpoint pop add AGMCORE_deltaX 28 add gt { currentpoint AGMCORE_deltaY sub exch pop clippath pathbbox pop pop pop 44 add exch moveto }if AGMCORE_str256 0 AGMCORE_tmp getinterval show( )show 0 1 AGMCORE_str256 length 1 sub { AGMCORE_str256 exch 0 put }for /AGMCORE_tmp 0 def }{ AGMCORE_str256 exch AGMCORE_tmp xpt /AGMCORE_tmp AGMCORE_tmp 1 add def }ifelse }forall }bdf /AGMCORE_CMYKDeviceNColorspaces[ [/Separation/None/DeviceCMYK{0 0 0}] [/Separation(Black)/DeviceCMYK{0 0 0 4 -1 roll}bind] [/Separation(Yellow)/DeviceCMYK{0 0 3 -1 roll 0}bind] [/DeviceN[(Yellow)(Black)]/DeviceCMYK{0 0 4 2 roll}bind] [/Separation(Magenta)/DeviceCMYK{0 exch 0 0}bind] [/DeviceN[(Magenta)(Black)]/DeviceCMYK{0 3 1 roll 0 exch}bind] [/DeviceN[(Magenta)(Yellow)]/DeviceCMYK{0 3 1 roll 0}bind] [/DeviceN[(Magenta)(Yellow)(Black)]/DeviceCMYK{0 4 1 roll}bind] [/Separation(Cyan)/DeviceCMYK{0 0 0}] [/DeviceN[(Cyan)(Black)]/DeviceCMYK{0 0 3 -1 roll}bind] [/DeviceN[(Cyan)(Yellow)]/DeviceCMYK{0 exch 0}bind] [/DeviceN[(Cyan)(Yellow)(Black)]/DeviceCMYK{0 3 1 roll}bind] [/DeviceN[(Cyan)(Magenta)]/DeviceCMYK{0 0}] [/DeviceN[(Cyan)(Magenta)(Black)]/DeviceCMYK{0 exch}bind] [/DeviceN[(Cyan)(Magenta)(Yellow)]/DeviceCMYK{0}] [/DeviceCMYK] ]def /ds{ Adobe_AGM_Core begin /currentdistillerparams where { pop currentdistillerparams/CoreDistVersion get 5000 lt {<>setdistillerparams}if }if /AGMCORE_ps_version xdf /AGMCORE_ps_level xdf errordict/AGM_handleerror known not{ errordict/AGM_handleerror errordict/handleerror get put errordict/handleerror{ Adobe_AGM_Core begin $error/newerror get AGMCORE_cur_err null ne and{ $error/newerror false put AGMCORE_cur_err compose_error_msg }if $error/newerror true put end errordict/AGM_handleerror get exec }bind put }if /AGMCORE_environ_ok ps_level AGMCORE_ps_level ge ps_version AGMCORE_ps_version ge and AGMCORE_ps_level -1 eq or def AGMCORE_environ_ok not {/AGMCORE_cur_err/AGMCORE_bad_environ def}if /AGMCORE_&setgray systemdict/setgray get def level2{ /AGMCORE_&setcolor systemdict/setcolor get def /AGMCORE_&setcolorspace systemdict/setcolorspace get def }if /AGMCORE_currentbg currentblackgeneration def /AGMCORE_currentucr currentundercolorremoval def /AGMCORE_Default_flatness currentflat def /AGMCORE_distilling /product where{ pop systemdict/setdistillerparams known product(Adobe PostScript Parser)ne and }{ false }ifelse def /AGMCORE_GSTATE AGMCORE_key_known not{ /AGMCORE_GSTATE 21 dict def /AGMCORE_tmpmatrix matrix def /AGMCORE_gstack 32 array def /AGMCORE_gstackptr 0 def /AGMCORE_gstacksaveptr 0 def /AGMCORE_gstackframekeys 14 def /AGMCORE_&gsave/gsave ldf /AGMCORE_&grestore/grestore ldf /AGMCORE_&grestoreall/grestoreall ldf /AGMCORE_&save/save ldf /AGMCORE_&setoverprint/setoverprint ldf /AGMCORE_gdictcopy{ begin {def}forall end }def /AGMCORE_gput{ AGMCORE_gstack AGMCORE_gstackptr get 3 1 roll put }def /AGMCORE_gget{ AGMCORE_gstack AGMCORE_gstackptr get exch get }def /gsave{ AGMCORE_&gsave AGMCORE_gstack AGMCORE_gstackptr get AGMCORE_gstackptr 1 add dup 32 ge{limitcheck}if /AGMCORE_gstackptr exch store AGMCORE_gstack AGMCORE_gstackptr get AGMCORE_gdictcopy }def /grestore{ AGMCORE_&grestore AGMCORE_gstackptr 1 sub dup AGMCORE_gstacksaveptr lt{1 add}if dup AGMCORE_gstack exch get dup/AGMCORE_currentoverprint known {/AGMCORE_currentoverprint get setoverprint}{pop}ifelse /AGMCORE_gstackptr exch store }def /grestoreall{ AGMCORE_&grestoreall /AGMCORE_gstackptr AGMCORE_gstacksaveptr store }def /save{ AGMCORE_&save AGMCORE_gstack AGMCORE_gstackptr get AGMCORE_gstackptr 1 add dup 32 ge{limitcheck}if /AGMCORE_gstackptr exch store /AGMCORE_gstacksaveptr AGMCORE_gstackptr store AGMCORE_gstack AGMCORE_gstackptr get AGMCORE_gdictcopy }def /setoverprint{ dup/AGMCORE_currentoverprint exch AGMCORE_gput AGMCORE_&setoverprint }def 0 1 AGMCORE_gstack length 1 sub{ AGMCORE_gstack exch AGMCORE_gstackframekeys dict put }for }if level3/AGMCORE_&sysshfill AGMCORE_key_known not and { /AGMCORE_&sysshfill systemdict/shfill get def /AGMCORE_&sysmakepattern systemdict/makepattern get def /AGMCORE_&usrmakepattern/makepattern load def }if /currentcmykcolor[0 0 0 0]AGMCORE_gput /currentstrokeadjust false AGMCORE_gput /currentcolorspace[/DeviceGray]AGMCORE_gput /sep_tint 0 AGMCORE_gput /devicen_tints[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]AGMCORE_gput /sep_colorspace_dict null AGMCORE_gput /devicen_colorspace_dict null AGMCORE_gput /indexed_colorspace_dict null AGMCORE_gput /currentcolor_intent()AGMCORE_gput /customcolor_tint 1 AGMCORE_gput /absolute_colorimetric_crd null AGMCORE_gput /relative_colorimetric_crd null AGMCORE_gput /saturation_crd null AGMCORE_gput /perceptual_crd null AGMCORE_gput currentcolortransfer cvlit/AGMCore_gray_xfer xdf cvlit/AGMCore_b_xfer xdf cvlit/AGMCore_g_xfer xdf cvlit/AGMCore_r_xfer xdf << /MaxPatternItem currentsystemparams/MaxPatternCache get >> setuserparams end }def /ps { /setcmykcolor where{ pop Adobe_AGM_Core/AGMCORE_&setcmykcolor/setcmykcolor load put }if Adobe_AGM_Core begin /setcmykcolor { 4 copy AGMCORE_cmykbuf astore/currentcmykcolor exch AGMCORE_gput 1 sub 4 1 roll 3{ 3 index add neg dup 0 lt{ pop 0 }if 3 1 roll }repeat setrgbcolor pop }ndf /currentcmykcolor { /currentcmykcolor AGMCORE_gget aload pop }ndf /setoverprint {pop}ndf /currentoverprint {false}ndf /AGMCORE_cyan_plate 1 0 0 0 test_cmyk_color_plate def /AGMCORE_magenta_plate 0 1 0 0 test_cmyk_color_plate def /AGMCORE_yellow_plate 0 0 1 0 test_cmyk_color_plate def /AGMCORE_black_plate 0 0 0 1 test_cmyk_color_plate def /AGMCORE_plate_ndx AGMCORE_cyan_plate{ 0 }{ AGMCORE_magenta_plate{ 1 }{ AGMCORE_yellow_plate{ 2 }{ AGMCORE_black_plate{ 3 }{ 4 }ifelse }ifelse }ifelse }ifelse def /AGMCORE_have_reported_unsupported_color_space false def /AGMCORE_report_unsupported_color_space { AGMCORE_have_reported_unsupported_color_space false eq { (Warning: Job contains content that cannot be separated with on-host methods. This content appears on the black plate, and knocks out all other plates.)== Adobe_AGM_Core/AGMCORE_have_reported_unsupported_color_space true ddf }if }def /AGMCORE_composite_job AGMCORE_cyan_plate AGMCORE_magenta_plate and AGMCORE_yellow_plate and AGMCORE_black_plate and def /AGMCORE_in_rip_sep /AGMCORE_in_rip_sep where{ pop AGMCORE_in_rip_sep }{ AGMCORE_distilling { false }{ userdict/Adobe_AGM_OnHost_Seps known{ false }{ level2{ currentpagedevice/Separations 2 copy known{ get }{ pop pop false }ifelse }{ false }ifelse }ifelse }ifelse }ifelse def /AGMCORE_producing_seps AGMCORE_composite_job not AGMCORE_in_rip_sep or def /AGMCORE_host_sep AGMCORE_producing_seps AGMCORE_in_rip_sep not and def /AGM_preserve_spots /AGM_preserve_spots where{ pop AGM_preserve_spots }{ AGMCORE_distilling AGMCORE_producing_seps or }ifelse def /AGM_is_distiller_preserving_spotimages { currentdistillerparams/PreserveOverprintSettings known { currentdistillerparams/PreserveOverprintSettings get { currentdistillerparams/ColorConversionStrategy known { currentdistillerparams/ColorConversionStrategy get /sRGB ne }{ true }ifelse }{ false }ifelse }{ false }ifelse }def /convert_spot_to_process where{pop}{ /convert_spot_to_process { //Adobe_AGM_Core begin dup map_alias{ /Name get exch pop }if dup dup(None)eq exch(All)eq or { pop false }{ AGMCORE_host_sep { gsave 1 0 0 0 setcmykcolor currentgray 1 exch sub 0 1 0 0 setcmykcolor currentgray 1 exch sub 0 0 1 0 setcmykcolor currentgray 1 exch sub 0 0 0 1 setcmykcolor currentgray 1 exch sub add add add 0 eq { pop false }{ false setoverprint current_spot_alias false set_spot_alias 1 1 1 1 6 -1 roll findcmykcustomcolor 1 setcustomcolor set_spot_alias currentgray 1 ne }ifelse grestore }{ AGMCORE_distilling { pop AGM_is_distiller_preserving_spotimages not }{ //Adobe_AGM_Core/AGMCORE_name xddf false //Adobe_AGM_Core/AGMCORE_pattern_paint_type get 0 eq AGMUTIL_cpd/OverrideSeparations known and { AGMUTIL_cpd/OverrideSeparations get { /HqnSpots/ProcSet resourcestatus { pop pop pop true }if }if }if { AGMCORE_name/HqnSpots/ProcSet findresource/TestSpot gx not }{ gsave [/Separation AGMCORE_name/DeviceGray{}]AGMCORE_&setcolorspace false AGMUTIL_cpd/SeparationColorNames 2 copy known { get {AGMCORE_name eq or}forall not }{ pop pop pop true }ifelse grestore }ifelse }ifelse }ifelse }ifelse end }def }ifelse /convert_to_process where{pop}{ /convert_to_process { dup length 0 eq { pop false }{ AGMCORE_host_sep { dup true exch { dup(Cyan)eq exch dup(Magenta)eq 3 -1 roll or exch dup(Yellow)eq 3 -1 roll or exch dup(Black)eq 3 -1 roll or {pop} {convert_spot_to_process and}ifelse } forall { true exch { dup(Cyan)eq exch dup(Magenta)eq 3 -1 roll or exch dup(Yellow)eq 3 -1 roll or exch (Black)eq or and }forall not }{pop false}ifelse }{ false exch { /PhotoshopDuotoneList where{pop false}{true}ifelse { dup(Cyan)eq exch dup(Magenta)eq 3 -1 roll or exch dup(Yellow)eq 3 -1 roll or exch dup(Black)eq 3 -1 roll or {pop} {convert_spot_to_process or}ifelse } { convert_spot_to_process or } ifelse } forall }ifelse }ifelse }def }ifelse /AGMCORE_avoid_L2_sep_space version cvr 2012 lt level2 and AGMCORE_producing_seps not and def /AGMCORE_is_cmyk_sep AGMCORE_cyan_plate AGMCORE_magenta_plate or AGMCORE_yellow_plate or AGMCORE_black_plate or def /AGM_avoid_0_cmyk where{ pop AGM_avoid_0_cmyk }{ AGM_preserve_spots userdict/Adobe_AGM_OnHost_Seps known userdict/Adobe_AGM_InRip_Seps known or not and }ifelse { /setcmykcolor[ { 4 copy add add add 0 eq currentoverprint and{ pop 0.0005 }if }/exec cvx /AGMCORE_&setcmykcolor load dup type/operatortype ne{ /exec cvx }if ]cvx def }if /AGMCORE_IsSeparationAProcessColor { dup(Cyan)eq exch dup(Magenta)eq exch dup(Yellow)eq exch(Black)eq or or or }def AGMCORE_host_sep{ /setcolortransfer { AGMCORE_cyan_plate{ pop pop pop }{ AGMCORE_magenta_plate{ 4 3 roll pop pop pop }{ AGMCORE_yellow_plate{ 4 2 roll pop pop pop }{ 4 1 roll pop pop pop }ifelse }ifelse }ifelse settransfer } def /AGMCORE_get_ink_data AGMCORE_cyan_plate{ {pop pop pop} }{ AGMCORE_magenta_plate{ {4 3 roll pop pop pop} }{ AGMCORE_yellow_plate{ {4 2 roll pop pop pop} }{ {4 1 roll pop pop pop} }ifelse }ifelse }ifelse def /AGMCORE_RemoveProcessColorNames { 1 dict begin /filtername { dup/Cyan eq 1 index(Cyan)eq or {pop(_cyan_)}if dup/Magenta eq 1 index(Magenta)eq or {pop(_magenta_)}if dup/Yellow eq 1 index(Yellow)eq or {pop(_yellow_)}if dup/Black eq 1 index(Black)eq or {pop(_black_)}if }def dup type/arraytype eq {[exch{filtername}forall]} {filtername}ifelse end }def level3{ /AGMCORE_IsCurrentColor { dup AGMCORE_IsSeparationAProcessColor { AGMCORE_plate_ndx 0 eq {dup(Cyan)eq exch/Cyan eq or}if AGMCORE_plate_ndx 1 eq {dup(Magenta)eq exch/Magenta eq or}if AGMCORE_plate_ndx 2 eq {dup(Yellow)eq exch/Yellow eq or}if AGMCORE_plate_ndx 3 eq {dup(Black)eq exch/Black eq or}if AGMCORE_plate_ndx 4 eq {pop false}if }{ gsave false setoverprint current_spot_alias false set_spot_alias 1 1 1 1 6 -1 roll findcmykcustomcolor 1 setcustomcolor set_spot_alias currentgray 1 ne grestore }ifelse }def /AGMCORE_filter_functiondatasource { 5 dict begin /data_in xdf data_in type/stringtype eq { /ncomp xdf /comp xdf /string_out data_in length ncomp idiv string def 0 ncomp data_in length 1 sub { string_out exch dup ncomp idiv exch data_in exch ncomp getinterval comp get 255 exch sub put }for string_out }{ string/string_in xdf /string_out 1 string def /component xdf [ data_in string_in/readstring cvx [component/get cvx 255/exch cvx/sub cvx string_out/exch cvx 0/exch cvx/put cvx string_out]cvx [/pop cvx()]cvx/ifelse cvx ]cvx/ReusableStreamDecode filter }ifelse end }def /AGMCORE_separateShadingFunction { 2 dict begin /paint? xdf /channel xdf dup type/dicttype eq { begin FunctionType 0 eq { /DataSource channel Range length 2 idiv DataSource AGMCORE_filter_functiondatasource def currentdict/Decode known {/Decode Decode channel 2 mul 2 getinterval def}if paint? not {/Decode[1 1]def}if }if FunctionType 2 eq { paint? { /C0[C0 channel get 1 exch sub]def /C1[C1 channel get 1 exch sub]def }{ /C0[1]def /C1[1]def }ifelse }if FunctionType 3 eq { /Functions[Functions{channel paint? AGMCORE_separateShadingFunction}forall]def }if currentdict/Range known {/Range[0 1]def}if currentdict end}{ channel get 0 paint? AGMCORE_separateShadingFunction }ifelse end }def /AGMCORE_separateShading { 3 -1 roll begin currentdict/Function known { currentdict/Background known {[1 index{Background 3 index get 1 exch sub}{1}ifelse]/Background xdf}if Function 3 1 roll AGMCORE_separateShadingFunction/Function xdf /ColorSpace[/DeviceGray]def }{ ColorSpace dup type/arraytype eq{0 get}if/DeviceCMYK eq { /ColorSpace[/DeviceN[/_cyan_/_magenta_/_yellow_/_black_]/DeviceCMYK{}]def }{ ColorSpace dup 1 get AGMCORE_RemoveProcessColorNames 1 exch put }ifelse ColorSpace 0 get/Separation eq { { [1/exch cvx/sub cvx]cvx }{ [/pop cvx 1]cvx }ifelse ColorSpace 3 3 -1 roll put pop }{ { [exch ColorSpace 1 get length 1 sub exch sub/index cvx 1/exch cvx/sub cvx ColorSpace 1 get length 1 add 1/roll cvx ColorSpace 1 get length{/pop cvx}repeat]cvx }{ pop[ColorSpace 1 get length{/pop cvx}repeat cvx 1]cvx }ifelse ColorSpace 3 3 -1 roll bind put }ifelse ColorSpace 2/DeviceGray put }ifelse end }def /AGMCORE_separateShadingDict { dup/ColorSpace get dup type/arraytype ne {[exch]}if dup 0 get/DeviceCMYK eq { exch begin currentdict AGMCORE_cyan_plate {0 true}if AGMCORE_magenta_plate {1 true}if AGMCORE_yellow_plate {2 true}if AGMCORE_black_plate {3 true}if AGMCORE_plate_ndx 4 eq {0 false}if dup not currentoverprint and {/AGMCORE_ignoreshade true def}if AGMCORE_separateShading currentdict end exch }if dup 0 get/Separation eq { exch begin ColorSpace 1 get dup/None ne exch/All ne and { ColorSpace 1 get AGMCORE_IsCurrentColor AGMCORE_plate_ndx 4 lt and ColorSpace 1 get AGMCORE_IsSeparationAProcessColor not and { ColorSpace 2 get dup type/arraytype eq{0 get}if/DeviceCMYK eq { /ColorSpace [ /Separation ColorSpace 1 get /DeviceGray [ ColorSpace 3 get/exec cvx 4 AGMCORE_plate_ndx sub -1/roll cvx 4 1/roll cvx 3[/pop cvx]cvx/repeat cvx 1/exch cvx/sub cvx ]cvx ]def }{ AGMCORE_report_unsupported_color_space AGMCORE_black_plate not { currentdict 0 false AGMCORE_separateShading }if }ifelse }{ currentdict ColorSpace 1 get AGMCORE_IsCurrentColor 0 exch dup not currentoverprint and {/AGMCORE_ignoreshade true def}if AGMCORE_separateShading }ifelse }if currentdict end exch }if dup 0 get/DeviceN eq { exch begin ColorSpace 1 get convert_to_process { ColorSpace 2 get dup type/arraytype eq{0 get}if/DeviceCMYK eq { /ColorSpace [ /DeviceN ColorSpace 1 get /DeviceGray [ ColorSpace 3 get/exec cvx 4 AGMCORE_plate_ndx sub -1/roll cvx 4 1/roll cvx 3[/pop cvx]cvx/repeat cvx 1/exch cvx/sub cvx ]cvx ]def }{ AGMCORE_report_unsupported_color_space AGMCORE_black_plate not { currentdict 0 false AGMCORE_separateShading /ColorSpace[/DeviceGray]def }if }ifelse }{ currentdict false -1 ColorSpace 1 get { AGMCORE_IsCurrentColor { 1 add exch pop true exch exit }if 1 add }forall exch dup not currentoverprint and {/AGMCORE_ignoreshade true def}if AGMCORE_separateShading }ifelse currentdict end exch }if dup 0 get dup/DeviceCMYK eq exch dup/Separation eq exch/DeviceN eq or or not { exch begin ColorSpace dup type/arraytype eq {0 get}if /DeviceGray ne { AGMCORE_report_unsupported_color_space AGMCORE_black_plate not { ColorSpace 0 get/CIEBasedA eq { /ColorSpace[/Separation/_ciebaseda_/DeviceGray{}]def }if ColorSpace 0 get dup/CIEBasedABC eq exch dup/CIEBasedDEF eq exch/DeviceRGB eq or or { /ColorSpace[/DeviceN[/_red_/_green_/_blue_]/DeviceRGB{}]def }if ColorSpace 0 get/CIEBasedDEFG eq { /ColorSpace[/DeviceN[/_cyan_/_magenta_/_yellow_/_black_]/DeviceCMYK{}]def }if currentdict 0 false AGMCORE_separateShading }if }if currentdict end exch }if pop dup/AGMCORE_ignoreshade known { begin /ColorSpace[/Separation(None)/DeviceGray{}]def currentdict end }if }def /shfill { AGMCORE_separateShadingDict dup/AGMCORE_ignoreshade known {pop} {AGMCORE_&sysshfill}ifelse }def /makepattern { exch dup/PatternType get 2 eq { clonedict begin /Shading Shading AGMCORE_separateShadingDict def Shading/AGMCORE_ignoreshade known currentdict end exch {pop<>}if exch AGMCORE_&sysmakepattern }{ exch AGMCORE_&usrmakepattern }ifelse }def }if }if AGMCORE_in_rip_sep{ /setcustomcolor { exch aload pop dup 7 1 roll inRip_spot_has_ink not { 4{4 index mul 4 1 roll} repeat /DeviceCMYK setcolorspace 6 -2 roll pop pop }{ //Adobe_AGM_Core begin /AGMCORE_k xdf/AGMCORE_y xdf/AGMCORE_m xdf/AGMCORE_c xdf end [/Separation 4 -1 roll/DeviceCMYK {dup AGMCORE_c mul exch dup AGMCORE_m mul exch dup AGMCORE_y mul exch AGMCORE_k mul} ] setcolorspace }ifelse setcolor }ndf /setseparationgray { [/Separation(All)/DeviceGray{}]setcolorspace_opt 1 exch sub setcolor }ndf }{ /setseparationgray { AGMCORE_&setgray }ndf }ifelse /findcmykcustomcolor { 5 makereadonlyarray }ndf /setcustomcolor { exch aload pop pop 4{4 index mul 4 1 roll}repeat setcmykcolor pop }ndf /has_color /colorimage where{ AGMCORE_producing_seps{ pop true }{ systemdict eq }ifelse }{ false }ifelse def /map_index { 1 index mul exch getinterval{255 div}forall }bdf /map_indexed_devn { Lookup Names length 3 -1 roll cvi map_index }bdf /n_color_components { base_colorspace_type dup/DeviceGray eq{ pop 1 }{ /DeviceCMYK eq{ 4 }{ 3 }ifelse }ifelse }bdf level2{ /mo/moveto ldf /li/lineto ldf /cv/curveto ldf /knockout_unitsq { 1 setgray 0 0 1 1 rectfill }def level2/setcolorspace AGMCORE_key_known not and{ /AGMCORE_&&&setcolorspace/setcolorspace ldf /AGMCORE_ReplaceMappedColor { dup type dup/arraytype eq exch/packedarraytype eq or { /AGMCORE_SpotAliasAry2 where{ begin dup 0 get dup/Separation eq { pop dup length array copy dup dup 1 get current_spot_alias { dup map_alias { false set_spot_alias dup 1 exch setsepcolorspace true set_spot_alias begin /sep_colorspace_dict currentdict AGMCORE_gput pop pop pop [ /Separation Name CSA map_csa MappedCSA /sep_colorspace_proc load ] dup Name end }if }if map_reserved_ink_name 1 xpt }{ /DeviceN eq { dup length array copy dup dup 1 get[ exch{ current_spot_alias{ dup map_alias{ /Name get exch pop }if }if map_reserved_ink_name }forall ]1 xpt }if }ifelse end }if }if }def /setcolorspace { dup type dup/arraytype eq exch/packedarraytype eq or { dup 0 get/Indexed eq { AGMCORE_distilling { /PhotoshopDuotoneList where { pop false }{ true }ifelse }{ true }ifelse { aload pop 3 -1 roll AGMCORE_ReplaceMappedColor 3 1 roll 4 array astore }if }{ AGMCORE_ReplaceMappedColor }ifelse }if DeviceN_PS2_inRip_seps{AGMCORE_&&&setcolorspace}if }def }if }{ /adj { currentstrokeadjust{ transform 0.25 sub round 0.25 add exch 0.25 sub round 0.25 add exch itransform }if }def /mo{ adj moveto }def /li{ adj lineto }def /cv{ 6 2 roll adj 6 2 roll adj 6 2 roll adj curveto }def /knockout_unitsq { 1 setgray 8 8 1[8 0 0 8 0 0]{}image }def /currentstrokeadjust{ /currentstrokeadjust AGMCORE_gget }def /setstrokeadjust{ /currentstrokeadjust exch AGMCORE_gput }def /setcolorspace { /currentcolorspace exch AGMCORE_gput }def /currentcolorspace { /currentcolorspace AGMCORE_gget }def /setcolor_devicecolor { base_colorspace_type dup/DeviceGray eq{ pop setgray }{ /DeviceCMYK eq{ setcmykcolor }{ setrgbcolor }ifelse }ifelse }def /setcolor { currentcolorspace 0 get dup/DeviceGray ne{ dup/DeviceCMYK ne{ dup/DeviceRGB ne{ dup/Separation eq{ pop currentcolorspace 3 gx currentcolorspace 2 get }{ dup/Indexed eq{ pop currentcolorspace 3 get dup type/stringtype eq{ currentcolorspace 1 get n_color_components 3 -1 roll map_index }{ exec }ifelse currentcolorspace 1 get }{ /AGMCORE_cur_err/AGMCORE_invalid_color_space def AGMCORE_invalid_color_space }ifelse }ifelse }if }if }if setcolor_devicecolor }def }ifelse /sop/setoverprint ldf /lw/setlinewidth ldf /lc/setlinecap ldf /lj/setlinejoin ldf /ml/setmiterlimit ldf /dsh/setdash ldf /sadj/setstrokeadjust ldf /gry/setgray ldf /rgb/setrgbcolor ldf /cmyk[ /currentcolorspace[/DeviceCMYK]/AGMCORE_gput cvx /setcmykcolor load dup type/operatortype ne{/exec cvx}if ]cvx bdf level3 AGMCORE_host_sep not and{ /nzopmsc{ 6 dict begin /kk exch def /yy exch def /mm exch def /cc exch def /sum 0 def cc 0 ne{/sum sum 2#1000 or def cc}if mm 0 ne{/sum sum 2#0100 or def mm}if yy 0 ne{/sum sum 2#0010 or def yy}if kk 0 ne{/sum sum 2#0001 or def kk}if AGMCORE_CMYKDeviceNColorspaces sum get setcolorspace sum 0 eq{0}if end setcolor }bdf }{ /nzopmsc/cmyk ldf }ifelse /sep/setsepcolor ldf /devn/setdevicencolor ldf /idx/setindexedcolor ldf /colr/setcolor ldf /csacrd/set_csa_crd ldf /sepcs/setsepcolorspace ldf /devncs/setdevicencolorspace ldf /idxcs/setindexedcolorspace ldf /cp/closepath ldf /clp/clp_npth ldf /eclp/eoclp_npth ldf /f/fill ldf /ef/eofill ldf /@/stroke ldf /nclp/npth_clp ldf /gset/graphic_setup ldf /gcln/graphic_cleanup ldf /ct/concat ldf /cf/currentfile ldf /fl/filter ldf /rs/readstring ldf /AGMCORE_def_ht currenthalftone def /clonedict Adobe_AGM_Utils begin/clonedict load end def /clonearray Adobe_AGM_Utils begin/clonearray load end def currentdict{ dup xcheck 1 index type dup/arraytype eq exch/packedarraytype eq or and{ bind }if def }forall /getrampcolor { /indx exch def 0 1 NumComp 1 sub { dup Samples exch get dup type/stringtype eq{indx get}if exch Scaling exch get aload pop 3 1 roll mul add }for ColorSpaceFamily/Separation eq {sep} { ColorSpaceFamily/DeviceN eq {devn}{setcolor}ifelse }ifelse }bdf /sssetbackground{ aload pop ColorSpaceFamily/Separation eq {sep} { ColorSpaceFamily/DeviceN eq {devn}{setcolor}ifelse }ifelse }bdf /RadialShade { 40 dict begin /ColorSpaceFamily xdf /background xdf /ext1 xdf /ext0 xdf /BBox xdf /r2 xdf /c2y xdf /c2x xdf /r1 xdf /c1y xdf /c1x xdf /rampdict xdf /setinkoverprint where{pop/setinkoverprint{pop}def}if gsave BBox length 0 gt { np BBox 0 get BBox 1 get moveto BBox 2 get BBox 0 get sub 0 rlineto 0 BBox 3 get BBox 1 get sub rlineto BBox 2 get BBox 0 get sub neg 0 rlineto closepath clip np }if c1x c2x eq { c1y c2y lt{/theta 90 def}{/theta 270 def}ifelse }{ /slope c2y c1y sub c2x c1x sub div def /theta slope 1 atan def c2x c1x lt c2y c1y ge and{/theta theta 180 sub def}if c2x c1x lt c2y c1y lt and{/theta theta 180 add def}if }ifelse gsave clippath c1x c1y translate theta rotate -90 rotate {pathbbox}stopped {0 0 0 0}if /yMax xdf /xMax xdf /yMin xdf /xMin xdf grestore xMax xMin eq yMax yMin eq or { grestore end }{ /max{2 copy gt{pop}{exch pop}ifelse}bdf /min{2 copy lt{pop}{exch pop}ifelse}bdf rampdict begin 40 dict begin background length 0 gt{background sssetbackground gsave clippath fill grestore}if gsave c1x c1y translate theta rotate -90 rotate /c2y c1x c2x sub dup mul c1y c2y sub dup mul add sqrt def /c1y 0 def /c1x 0 def /c2x 0 def ext0 { 0 getrampcolor c2y r2 add r1 sub 0.0001 lt { c1x c1y r1 360 0 arcn pathbbox /aymax exch def /axmax exch def /aymin exch def /axmin exch def /bxMin xMin axmin min def /byMin yMin aymin min def /bxMax xMax axmax max def /byMax yMax aymax max def bxMin byMin moveto bxMax byMin lineto bxMax byMax lineto bxMin byMax lineto bxMin byMin lineto eofill }{ c2y r1 add r2 le { c1x c1y r1 0 360 arc fill } { c2x c2y r2 0 360 arc fill r1 r2 eq { /p1x r1 neg def /p1y c1y def /p2x r1 def /p2y c1y def p1x p1y moveto p2x p2y lineto p2x yMin lineto p1x yMin lineto fill }{ /AA r2 r1 sub c2y div def AA -1 eq {/theta 89.99 def} {/theta AA 1 AA dup mul sub sqrt div 1 atan def} ifelse /SS1 90 theta add dup sin exch cos div def /p1x r1 SS1 SS1 mul SS1 SS1 mul 1 add div sqrt mul neg def /p1y p1x SS1 div neg def /SS2 90 theta sub dup sin exch cos div def /p2x r1 SS2 SS2 mul SS2 SS2 mul 1 add div sqrt mul def /p2y p2x SS2 div neg def r1 r2 gt { /L1maxX p1x yMin p1y sub SS1 div add def /L2maxX p2x yMin p2y sub SS2 div add def }{ /L1maxX 0 def /L2maxX 0 def }ifelse p1x p1y moveto p2x p2y lineto L2maxX L2maxX p2x sub SS2 mul p2y add lineto L1maxX L1maxX p1x sub SS1 mul p1y add lineto fill }ifelse }ifelse }ifelse }if c1x c2x sub dup mul c1y c2y sub dup mul add 0.5 exp 0 dtransform dup mul exch dup mul add 0.5 exp 72 div 0 72 matrix defaultmatrix dtransform dup mul exch dup mul add sqrt 72 0 matrix defaultmatrix dtransform dup mul exch dup mul add sqrt 1 index 1 index lt{exch}if pop /hires xdf hires mul /numpix xdf /numsteps NumSamples def /rampIndxInc 1 def /subsampling false def numpix 0 ne { NumSamples numpix div 0.5 gt { /numsteps numpix 2 div round cvi dup 1 le{pop 2}if def /rampIndxInc NumSamples 1 sub numsteps div def /subsampling true def }if }if /xInc c2x c1x sub numsteps div def /yInc c2y c1y sub numsteps div def /rInc r2 r1 sub numsteps div def /cx c1x def /cy c1y def /radius r1 def np xInc 0 eq yInc 0 eq rInc 0 eq and and { 0 getrampcolor cx cy radius 0 360 arc stroke NumSamples 1 sub getrampcolor cx cy radius 72 hires div add 0 360 arc 0 setlinewidth stroke }{ 0 numsteps { dup subsampling{round cvi}if getrampcolor cx cy radius 0 360 arc /cx cx xInc add def /cy cy yInc add def /radius radius rInc add def cx cy radius 360 0 arcn eofill rampIndxInc add }repeat pop }ifelse ext1 { c2y r2 add r1 lt { c2x c2y r2 0 360 arc fill }{ c2y r1 add r2 sub 0.0001 le { c2x c2y r2 360 0 arcn pathbbox /aymax exch def /axmax exch def /aymin exch def /axmin exch def /bxMin xMin axmin min def /byMin yMin aymin min def /bxMax xMax axmax max def /byMax yMax aymax max def bxMin byMin moveto bxMax byMin lineto bxMax byMax lineto bxMin byMax lineto bxMin byMin lineto eofill }{ c2x c2y r2 0 360 arc fill r1 r2 eq { /p1x r2 neg def /p1y c2y def /p2x r2 def /p2y c2y def p1x p1y moveto p2x p2y lineto p2x yMax lineto p1x yMax lineto fill }{ /AA r2 r1 sub c2y div def AA -1 eq {/theta 89.99 def} {/theta AA 1 AA dup mul sub sqrt div 1 atan def} ifelse /SS1 90 theta add dup sin exch cos div def /p1x r2 SS1 SS1 mul SS1 SS1 mul 1 add div sqrt mul neg def /p1y c2y p1x SS1 div sub def /SS2 90 theta sub dup sin exch cos div def /p2x r2 SS2 SS2 mul SS2 SS2 mul 1 add div sqrt mul def /p2y c2y p2x SS2 div sub def r1 r2 lt { /L1maxX p1x yMax p1y sub SS1 div add def /L2maxX p2x yMax p2y sub SS2 div add def }{ /L1maxX 0 def /L2maxX 0 def }ifelse p1x p1y moveto p2x p2y lineto L2maxX L2maxX p2x sub SS2 mul p2y add lineto L1maxX L1maxX p1x sub SS1 mul p1y add lineto fill }ifelse }ifelse }ifelse }if grestore grestore end end end }ifelse }bdf /GenStrips { 40 dict begin /ColorSpaceFamily xdf /background xdf /ext1 xdf /ext0 xdf /BBox xdf /y2 xdf /x2 xdf /y1 xdf /x1 xdf /rampdict xdf /setinkoverprint where{pop/setinkoverprint{pop}def}if gsave BBox length 0 gt { np BBox 0 get BBox 1 get moveto BBox 2 get BBox 0 get sub 0 rlineto 0 BBox 3 get BBox 1 get sub rlineto BBox 2 get BBox 0 get sub neg 0 rlineto closepath clip np }if x1 x2 eq { y1 y2 lt{/theta 90 def}{/theta 270 def}ifelse }{ /slope y2 y1 sub x2 x1 sub div def /theta slope 1 atan def x2 x1 lt y2 y1 ge and{/theta theta 180 sub def}if x2 x1 lt y2 y1 lt and{/theta theta 180 add def}if } ifelse gsave clippath x1 y1 translate theta rotate {pathbbox}stopped {0 0 0 0}if /yMax exch def /xMax exch def /yMin exch def /xMin exch def grestore xMax xMin eq yMax yMin eq or { grestore end }{ rampdict begin 20 dict begin background length 0 gt{background sssetbackground gsave clippath fill grestore}if gsave x1 y1 translate theta rotate /xStart 0 def /xEnd x2 x1 sub dup mul y2 y1 sub dup mul add 0.5 exp def /ySpan yMax yMin sub def /numsteps NumSamples def /rampIndxInc 1 def /subsampling false def xStart 0 transform xEnd 0 transform 3 -1 roll sub dup mul 3 1 roll sub dup mul add 0.5 exp 72 div 0 72 matrix defaultmatrix dtransform dup mul exch dup mul add sqrt 72 0 matrix defaultmatrix dtransform dup mul exch dup mul add sqrt 1 index 1 index lt{exch}if pop mul /numpix xdf numpix 0 ne { NumSamples numpix div 0.5 gt { /numsteps numpix 2 div round cvi dup 1 le{pop 2}if def /rampIndxInc NumSamples 1 sub numsteps div def /subsampling true def }if }if ext0 { 0 getrampcolor xMin xStart lt { xMin yMin xMin neg ySpan rectfill }if }if /xInc xEnd xStart sub numsteps div def /x xStart def 0 numsteps { dup subsampling{round cvi}if getrampcolor x yMin xInc ySpan rectfill /x x xInc add def rampIndxInc add }repeat pop ext1{ xMax xEnd gt { xEnd yMin xMax xEnd sub ySpan rectfill }if }if grestore grestore end end end }ifelse }bdf }def /pt { end }def /dt{ }def /pgsv{ //Adobe_AGM_Core/AGMCORE_save save put }def /pgrs{ //Adobe_AGM_Core/AGMCORE_save get restore }def systemdict/findcolorrendering known{ /findcolorrendering systemdict/findcolorrendering get def }if systemdict/setcolorrendering known{ /setcolorrendering systemdict/setcolorrendering get def }if /test_cmyk_color_plate { gsave setcmykcolor currentgray 1 ne grestore }def /inRip_spot_has_ink { dup//Adobe_AGM_Core/AGMCORE_name xddf convert_spot_to_process not }def /map255_to_range { 1 index sub 3 -1 roll 255 div mul add }def /set_csa_crd { /sep_colorspace_dict null AGMCORE_gput begin CSA get_csa_by_name setcolorspace_opt set_crd end } def /map_csa { currentdict/MappedCSA known{MappedCSA null ne}{false}ifelse {pop}{get_csa_by_name/MappedCSA xdf}ifelse }def /setsepcolor { /sep_colorspace_dict AGMCORE_gget begin dup/sep_tint exch AGMCORE_gput TintProc end }def /setdevicencolor { /devicen_colorspace_dict AGMCORE_gget begin Names length copy Names length 1 sub -1 0 { /devicen_tints AGMCORE_gget 3 1 roll xpt }for TintProc end }def /sep_colorspace_proc { /AGMCORE_tmp exch store /sep_colorspace_dict AGMCORE_gget begin currentdict/Components known{ Components aload pop TintMethod/Lab eq{ 2{AGMCORE_tmp mul NComponents 1 roll}repeat LMax sub AGMCORE_tmp mul LMax add NComponents 1 roll }{ TintMethod/Subtractive eq{ NComponents{ AGMCORE_tmp mul NComponents 1 roll }repeat }{ NComponents{ 1 sub AGMCORE_tmp mul 1 add NComponents 1 roll }repeat }ifelse }ifelse }{ ColorLookup AGMCORE_tmp ColorLookup length 1 sub mul round cvi get aload pop }ifelse end }def /sep_colorspace_gray_proc { /AGMCORE_tmp exch store /sep_colorspace_dict AGMCORE_gget begin GrayLookup AGMCORE_tmp GrayLookup length 1 sub mul round cvi get end }def /sep_proc_name { dup 0 get dup/DeviceRGB eq exch/DeviceCMYK eq or level2 not and has_color not and{ pop[/DeviceGray] /sep_colorspace_gray_proc }{ /sep_colorspace_proc }ifelse }def /setsepcolorspace { current_spot_alias{ dup begin Name map_alias{ exch pop }if end }if dup/sep_colorspace_dict exch AGMCORE_gput begin CSA map_csa /AGMCORE_sep_special Name dup()eq exch(All)eq or store AGMCORE_avoid_L2_sep_space{ [/Indexed MappedCSA sep_proc_name 255 exch {255 div}/exec cvx 3 -1 roll[4 1 roll load/exec cvx]cvx ]setcolorspace_opt /TintProc{ 255 mul round cvi setcolor }bdf }{ MappedCSA 0 get/DeviceCMYK eq currentdict/Components known and AGMCORE_sep_special not and{ /TintProc[ Components aload pop Name findcmykcustomcolor /exch cvx/setcustomcolor cvx ]cvx bdf }{ AGMCORE_host_sep Name(All)eq and{ /TintProc{ 1 exch sub setseparationgray }bdf }{ AGMCORE_in_rip_sep MappedCSA 0 get/DeviceCMYK eq and AGMCORE_host_sep or Name()eq and{ /TintProc[ MappedCSA sep_proc_name exch 0 get/DeviceCMYK eq{ cvx/setcmykcolor cvx }{ cvx/setgray cvx }ifelse ]cvx bdf }{ AGMCORE_producing_seps MappedCSA 0 get dup/DeviceCMYK eq exch/DeviceGray eq or and AGMCORE_sep_special not and{ /TintProc[ /dup cvx MappedCSA sep_proc_name cvx exch 0 get/DeviceGray eq{ 1/exch cvx/sub cvx 0 0 0 4 -1/roll cvx }if /Name cvx/findcmykcustomcolor cvx/exch cvx AGMCORE_host_sep{ AGMCORE_is_cmyk_sep /Name cvx /AGMCORE_IsSeparationAProcessColor load/exec cvx /not cvx/and cvx }{ Name inRip_spot_has_ink not }ifelse [ /pop cvx 1 ]cvx/if cvx /setcustomcolor cvx ]cvx bdf }{ /TintProc{setcolor}bdf [/Separation Name MappedCSA sep_proc_name load]setcolorspace_opt }ifelse }ifelse }ifelse }ifelse }ifelse set_crd setsepcolor end }def /additive_blend { 3 dict begin /numarrays xdf /numcolors xdf 0 1 numcolors 1 sub { /c1 xdf 1 0 1 numarrays 1 sub { 1 exch add/index cvx c1/get cvx/mul cvx }for numarrays 1 add 1/roll cvx }for numarrays[/pop cvx]cvx/repeat cvx end }def /subtractive_blend { 3 dict begin /numarrays xdf /numcolors xdf 0 1 numcolors 1 sub { /c1 xdf 1 1 0 1 numarrays 1 sub { 1 3 3 -1 roll add/index cvx c1/get cvx/sub cvx/mul cvx }for /sub cvx numarrays 1 add 1/roll cvx }for numarrays[/pop cvx]cvx/repeat cvx end }def /exec_tint_transform { /TintProc[ /TintTransform cvx/setcolor cvx ]cvx bdf MappedCSA setcolorspace_opt }bdf /devn_makecustomcolor { 2 dict begin /names_index xdf /Names xdf 1 1 1 1 Names names_index get findcmykcustomcolor /devicen_tints AGMCORE_gget names_index get setcustomcolor Names length{pop}repeat end }bdf /setdevicencolorspace { dup/AliasedColorants known{false}{true}ifelse current_spot_alias and{ 7 dict begin /names_index 0 def dup/names_len exch/Names get length def /new_names names_len array def /new_LookupTables names_len array def /alias_cnt 0 def dup/Names get { dup map_alias{ exch pop dup/ColorLookup known{ dup begin new_LookupTables names_index ColorLookup put end }{ dup/Components known{ dup begin new_LookupTables names_index Components put end }{ dup begin new_LookupTables names_index[null null null null]put end }ifelse }ifelse new_names names_index 3 -1 roll/Name get put /alias_cnt alias_cnt 1 add def }{ /name xdf new_names names_index name put dup/LookupTables known{ dup begin new_LookupTables names_index LookupTables names_index get put end }{ dup begin new_LookupTables names_index[null null null null]put end }ifelse }ifelse /names_index names_index 1 add def }forall alias_cnt 0 gt{ /AliasedColorants true def /lut_entry_len new_LookupTables 0 get dup length 256 ge{0 get length}{length}ifelse def 0 1 names_len 1 sub{ /names_index xdf new_LookupTables names_index get dup length 256 ge{0 get length}{length}ifelse lut_entry_len ne{ /AliasedColorants false def exit }{ new_LookupTables names_index get 0 get null eq{ dup/Names get names_index get/name xdf name(Cyan)eq name(Magenta)eq name(Yellow)eq name(Black)eq or or or not{ /AliasedColorants false def exit }if }if }ifelse }for lut_entry_len 1 eq{ /AliasedColorants false def }if AliasedColorants{ dup begin /Names new_names def /LookupTables new_LookupTables def /AliasedColorants true def /NComponents lut_entry_len def /TintMethod NComponents 4 eq{/Subtractive}{/Additive}ifelse def /MappedCSA TintMethod/Additive eq{/DeviceRGB}{/DeviceCMYK}ifelse def currentdict/TTTablesIdx known not{ /TTTablesIdx -1 def }if end }if }if end }if dup/devicen_colorspace_dict exch AGMCORE_gput begin currentdict/AliasedColorants known{ AliasedColorants }{ false }ifelse dup not{ CSA map_csa }if /TintTransform load type/nulltype eq or{ /TintTransform[ 0 1 Names length 1 sub { /TTTablesIdx TTTablesIdx 1 add def dup LookupTables exch get dup 0 get null eq { 1 index Names exch get dup(Cyan)eq { pop exch LookupTables length exch sub /index cvx 0 0 0 } { dup(Magenta)eq { pop exch LookupTables length exch sub /index cvx 0/exch cvx 0 0 }{ (Yellow)eq { exch LookupTables length exch sub /index cvx 0 0 3 -1/roll cvx 0 }{ exch LookupTables length exch sub /index cvx 0 0 0 4 -1/roll cvx }ifelse }ifelse }ifelse 5 -1/roll cvx/astore cvx }{ dup length 1 sub LookupTables length 4 -1 roll sub 1 add /index cvx/mul cvx/round cvx/cvi cvx/get cvx }ifelse Names length TTTablesIdx add 1 add 1/roll cvx }for Names length[/pop cvx]cvx/repeat cvx NComponents Names length TintMethod/Subtractive eq { subtractive_blend }{ additive_blend }ifelse ]cvx bdf }if AGMCORE_host_sep{ Names convert_to_process{ exec_tint_transform } { currentdict/AliasedColorants known{ AliasedColorants not }{ false }ifelse 5 dict begin /AvoidAliasedColorants xdf /painted? false def /names_index 0 def /names_len Names length def AvoidAliasedColorants{ /currentspotalias current_spot_alias def false set_spot_alias }if Names{ AGMCORE_is_cmyk_sep{ dup(Cyan)eq AGMCORE_cyan_plate and exch dup(Magenta)eq AGMCORE_magenta_plate and exch dup(Yellow)eq AGMCORE_yellow_plate and exch (Black)eq AGMCORE_black_plate and or or or{ /devicen_colorspace_dict AGMCORE_gget/TintProc[ Names names_index/devn_makecustomcolor cvx ]cvx ddf /painted? true def }if painted?{exit}if }{ 0 0 0 0 5 -1 roll findcmykcustomcolor 1 setcustomcolor currentgray 0 eq{ /devicen_colorspace_dict AGMCORE_gget/TintProc[ Names names_index/devn_makecustomcolor cvx ]cvx ddf /painted? true def exit }if }ifelse /names_index names_index 1 add def }forall AvoidAliasedColorants{ currentspotalias set_spot_alias }if painted?{ /devicen_colorspace_dict AGMCORE_gget/names_index names_index put }{ /devicen_colorspace_dict AGMCORE_gget/TintProc[ names_len[/pop cvx]cvx/repeat cvx 1/setseparationgray cvx 0 0 0 0/setcmykcolor cvx ]cvx ddf }ifelse end }ifelse } { AGMCORE_in_rip_sep{ Names convert_to_process not }{ level3 }ifelse { [/DeviceN Names MappedCSA/TintTransform load]setcolorspace_opt /TintProc level3 not AGMCORE_in_rip_sep and{ [ Names/length cvx[/pop cvx]cvx/repeat cvx ]cvx bdf }{ {setcolor}bdf }ifelse }{ exec_tint_transform }ifelse }ifelse set_crd /AliasedColorants false def end }def /setindexedcolorspace { dup/indexed_colorspace_dict exch AGMCORE_gput begin currentdict/CSDBase known{ CSDBase/CSD get_res begin currentdict/Names known{ currentdict devncs }{ 1 currentdict sepcs }ifelse AGMCORE_host_sep{ 4 dict begin /compCnt/Names where{pop Names length}{1}ifelse def /NewLookup HiVal 1 add string def 0 1 HiVal{ /tableIndex xdf Lookup dup type/stringtype eq{ compCnt tableIndex map_index }{ exec }ifelse /Names where{ pop setdevicencolor }{ setsepcolor }ifelse currentgray tableIndex exch 255 mul cvi NewLookup 3 1 roll put }for [/Indexed currentcolorspace HiVal NewLookup]setcolorspace_opt end }{ level3 { currentdict/Names known{ [/Indexed[/DeviceN Names MappedCSA/TintTransform load]HiVal Lookup]setcolorspace_opt }{ [/Indexed[/Separation Name MappedCSA sep_proc_name load]HiVal Lookup]setcolorspace_opt }ifelse }{ [/Indexed MappedCSA HiVal [ currentdict/Names known{ Lookup dup type/stringtype eq {/exch cvx CSDBase/CSD get_res/Names get length dup/mul cvx exch/getinterval cvx{255 div}/forall cvx} {/exec cvx}ifelse /TintTransform load/exec cvx }{ Lookup dup type/stringtype eq {/exch cvx/get cvx 255/div cvx} {/exec cvx}ifelse CSDBase/CSD get_res/MappedCSA get sep_proc_name exch pop/load cvx/exec cvx }ifelse ]cvx ]setcolorspace_opt }ifelse }ifelse end set_crd } { CSA map_csa AGMCORE_host_sep level2 not and{ 0 0 0 0 setcmykcolor }{ [/Indexed MappedCSA level2 not has_color not and{ dup 0 get dup/DeviceRGB eq exch/DeviceCMYK eq or{ pop[/DeviceGray] }if HiVal GrayLookup }{ HiVal currentdict/RangeArray known{ { /indexed_colorspace_dict AGMCORE_gget begin Lookup exch dup HiVal gt{ pop HiVal }if NComponents mul NComponents getinterval{}forall NComponents 1 sub -1 0{ RangeArray exch 2 mul 2 getinterval aload pop map255_to_range NComponents 1 roll }for end }bind }{ Lookup }ifelse }ifelse ]setcolorspace_opt set_crd }ifelse }ifelse end }def /setindexedcolor { AGMCORE_host_sep{ /indexed_colorspace_dict AGMCORE_gget begin currentdict/CSDBase known{ CSDBase/CSD get_res begin currentdict/Names known{ map_indexed_devn devn } { Lookup 1 3 -1 roll map_index sep }ifelse end }{ Lookup MappedCSA/DeviceCMYK eq{4}{1}ifelse 3 -1 roll map_index MappedCSA/DeviceCMYK eq{setcmykcolor}{setgray}ifelse }ifelse end }{ level3 not AGMCORE_in_rip_sep and/indexed_colorspace_dict AGMCORE_gget/CSDBase known and{ /indexed_colorspace_dict AGMCORE_gget/CSDBase get/CSD get_res begin map_indexed_devn devn end } { setcolor }ifelse }ifelse }def /ignoreimagedata { currentoverprint not{ gsave dup clonedict begin 1 setgray /Decode[0 1]def /DataSourcedef /MultipleDataSources false def /BitsPerComponent 8 def currentdict end systemdict/image gx grestore }if consumeimagedata }def /add_res { dup/CSD eq{ pop //Adobe_AGM_Core begin /AGMCORE_CSD_cache load 3 1 roll put end }{ defineresource pop }ifelse }def /del_res { { aload pop exch dup/CSD eq{ pop {//Adobe_AGM_Core/AGMCORE_CSD_cache get exch undef}forall }{ exch {1 index undefineresource}forall pop }ifelse }forall }def /get_res { dup/CSD eq{ pop dup type dup/nametype eq exch/stringtype eq or{ AGMCORE_CSD_cache exch get }if }{ findresource }ifelse }def /get_csa_by_name { dup type dup/nametype eq exch/stringtype eq or{ /CSA get_res }if }def /paintproc_buf_init { /count get 0 0 put }def /paintproc_buf_next { dup/count get dup 0 get dup 3 1 roll 1 add 0 xpt get }def /cachepaintproc_compress { 5 dict begin currentfile exch 0 exch/SubFileDecode filter/ReadFilter exch def /ppdict 20 dict def /string_size 16000 def /readbuffer string_size string def currentglobal true setglobal ppdict 1 array dup 0 1 put/count xpt setglobal /LZWFilter { exch dup length 0 eq{ pop }{ ppdict dup length 1 sub 3 -1 roll put }ifelse {string_size}{0}ifelse string }/LZWEncode filter def { ReadFilter readbuffer readstring exch LZWFilter exch writestring not{exit}if }loop LZWFilter closefile ppdict end }def /cachepaintproc { 2 dict begin currentfile exch 0 exch/SubFileDecode filter/ReadFilter exch def /ppdict 20 dict def currentglobal true setglobal ppdict 1 array dup 0 1 put/count xpt setglobal { ReadFilter 16000 string readstring exch ppdict dup length 1 sub 3 -1 roll put not{exit}if }loop ppdict dup dup length 1 sub()put end }def /make_pattern { exch clonedict exch dup matrix currentmatrix matrix concatmatrix 0 0 3 2 roll itransform exch 3 index/XStep get 1 index exch 2 copy div cvi mul sub sub exch 3 index/YStep get 1 index exch 2 copy div cvi mul sub sub matrix translate exch matrix concatmatrix 1 index begin BBox 0 get XStep div cvi XStep mul/xshift exch neg def BBox 1 get YStep div cvi YStep mul/yshift exch neg def BBox 0 get xshift add BBox 1 get yshift add BBox 2 get xshift add BBox 3 get yshift add 4 array astore /BBox exch def [xshift yshift/translate load null/exec load]dup 3/PaintProc load put cvx/PaintProc exch def end gsave 0 setgray makepattern grestore }def /set_pattern { dup/PatternType get 1 eq{ dup/PaintType get 1 eq{ currentoverprint sop[/DeviceGray]setcolorspace 0 setgray }if }if setpattern }def /setcolorspace_opt { dup currentcolorspace eq{pop}{setcolorspace}ifelse }def /updatecolorrendering { currentcolorrendering/RenderingIntent known{ currentcolorrendering/RenderingIntent get } { Intent/AbsoluteColorimetric eq { /absolute_colorimetric_crd AGMCORE_gget dup null eq } { Intent/RelativeColorimetric eq { /relative_colorimetric_crd AGMCORE_gget dup null eq } { Intent/Saturation eq { /saturation_crd AGMCORE_gget dup null eq } { /perceptual_crd AGMCORE_gget dup null eq }ifelse }ifelse }ifelse { pop null } { /RenderingIntent known{null}{Intent}ifelse }ifelse }ifelse Intent ne{ Intent/ColorRendering{findresource}stopped { pop pop systemdict/findcolorrendering known { Intent findcolorrendering { /ColorRendering findresource true exch } { /ColorRendering findresource product(Xerox Phaser 5400)ne exch }ifelse dup Intent/AbsoluteColorimetric eq { /absolute_colorimetric_crd exch AGMCORE_gput } { Intent/RelativeColorimetric eq { /relative_colorimetric_crd exch AGMCORE_gput } { Intent/Saturation eq { /saturation_crd exch AGMCORE_gput } { Intent/Perceptual eq { /perceptual_crd exch AGMCORE_gput } { pop }ifelse }ifelse }ifelse }ifelse 1 index{exch}{pop}ifelse } {false}ifelse } {true}ifelse { dup begin currentdict/TransformPQR known{ currentdict/TransformPQR get aload pop 3{{}eq 3 1 roll}repeat or or } {true}ifelse currentdict/MatrixPQR known{ currentdict/MatrixPQR get aload pop 1.0 eq 9 1 roll 0.0 eq 9 1 roll 0.0 eq 9 1 roll 0.0 eq 9 1 roll 1.0 eq 9 1 roll 0.0 eq 9 1 roll 0.0 eq 9 1 roll 0.0 eq 9 1 roll 1.0 eq and and and and and and and and } {true}ifelse end or { clonedict begin /TransformPQR[ {4 -1 roll 3 get dup 3 1 roll sub 5 -1 roll 3 get 3 -1 roll sub div 3 -1 roll 3 get 3 -1 roll 3 get dup 4 1 roll sub mul add}bind {4 -1 roll 4 get dup 3 1 roll sub 5 -1 roll 4 get 3 -1 roll sub div 3 -1 roll 4 get 3 -1 roll 4 get dup 4 1 roll sub mul add}bind {4 -1 roll 5 get dup 3 1 roll sub 5 -1 roll 5 get 3 -1 roll sub div 3 -1 roll 5 get 3 -1 roll 5 get dup 4 1 roll sub mul add}bind ]def /MatrixPQR[0.8951 -0.7502 0.0389 0.2664 1.7135 -0.0685 -0.1614 0.0367 1.0296]def /RangePQR[-0.3227950745 2.3229645538 -1.5003771057 3.5003465881 -0.1369979095 2.136967392]def currentdict end }if setcolorrendering_opt }if }if }def /set_crd { AGMCORE_host_sep not level2 and{ currentdict/ColorRendering known{ ColorRendering/ColorRendering{findresource}stopped not{setcolorrendering_opt}if }{ currentdict/Intent known{ updatecolorrendering }if }ifelse currentcolorspace dup type/arraytype eq {0 get}if /DeviceRGB eq { currentdict/UCR known {/UCR}{/AGMCORE_currentucr}ifelse load setundercolorremoval currentdict/BG known {/BG}{/AGMCORE_currentbg}ifelse load setblackgeneration }if }if }def /set_ucrbg { dup null eq{pop/AGMCORE_currentbg load}{/Procedure get_res}ifelse setblackgeneration dup null eq{pop/AGMCORE_currentucr load}{/Procedure get_res}ifelse setundercolorremoval }def /setcolorrendering_opt { dup currentcolorrendering eq{ pop }{ product(HP Color LaserJet 2605)anchorsearch{ pop pop pop }{ pop clonedict begin /Intent Intent def currentdict end setcolorrendering }ifelse }ifelse }def /cpaint_gcomp { convert_to_process//Adobe_AGM_Core/AGMCORE_ConvertToProcess xddf //Adobe_AGM_Core/AGMCORE_ConvertToProcess get not { (%end_cpaint_gcomp)flushinput }if }def /cpaint_gsep { //Adobe_AGM_Core/AGMCORE_ConvertToProcess get { (%end_cpaint_gsep)flushinput }if }def /cpaint_gend {np}def /T1_path { currentfile token pop currentfile token pop mo { currentfile token pop dup type/stringtype eq {pop exit}if 0 exch rlineto currentfile token pop dup type/stringtype eq {pop exit}if 0 rlineto }loop }def /T1_gsave level3 {/clipsave} {/gsave}ifelse load def /T1_grestore level3 {/cliprestore} {/grestore}ifelse load def /set_spot_alias_ary { dup inherit_aliases //Adobe_AGM_Core/AGMCORE_SpotAliasAry xddf }def /set_spot_normalization_ary { dup inherit_aliases dup length /AGMCORE_SpotAliasAry where{pop AGMCORE_SpotAliasAry length add}if array //Adobe_AGM_Core/AGMCORE_SpotAliasAry2 xddf /AGMCORE_SpotAliasAry where{ pop AGMCORE_SpotAliasAry2 0 AGMCORE_SpotAliasAry putinterval AGMCORE_SpotAliasAry length }{0}ifelse AGMCORE_SpotAliasAry2 3 1 roll exch putinterval true set_spot_alias }def /inherit_aliases { {dup/Name get map_alias{/CSD put}{pop}ifelse}forall }def /set_spot_alias { /AGMCORE_SpotAliasAry2 where{ /AGMCORE_current_spot_alias 3 -1 roll put }{ pop }ifelse }def /current_spot_alias { /AGMCORE_SpotAliasAry2 where{ /AGMCORE_current_spot_alias get }{ false }ifelse }def /map_alias { /AGMCORE_SpotAliasAry2 where{ begin /AGMCORE_name xdf false AGMCORE_SpotAliasAry2{ dup/Name get AGMCORE_name eq{ /CSD get/CSD get_res exch pop true exit }{ pop }ifelse }forall end }{ pop false }ifelse }bdf /spot_alias { true set_spot_alias /AGMCORE_&setcustomcolor AGMCORE_key_known not{ //Adobe_AGM_Core/AGMCORE_&setcustomcolor/setcustomcolor load put }if /customcolor_tint 1 AGMCORE_gput //Adobe_AGM_Core begin /setcustomcolor { //Adobe_AGM_Core begin dup/customcolor_tint exch AGMCORE_gput 1 index aload pop pop 1 eq exch 1 eq and exch 1 eq and exch 1 eq and not current_spot_alias and{1 index 4 get map_alias}{false}ifelse { false set_spot_alias /sep_colorspace_dict AGMCORE_gget null ne {/sep_colorspace_dict AGMCORE_gget/ForeignContent known not}{false}ifelse 3 1 roll 2 index{ exch pop/sep_tint AGMCORE_gget exch }if mark 3 1 roll setsepcolorspace counttomark 0 ne{ setsepcolor }if pop not{/sep_tint 1.0 AGMCORE_gput/sep_colorspace_dict AGMCORE_gget/ForeignContent true put}if pop true set_spot_alias }{ AGMCORE_&setcustomcolor }ifelse end }bdf end }def /begin_feature { Adobe_AGM_Core/AGMCORE_feature_dictCount countdictstack put count Adobe_AGM_Core/AGMCORE_feature_opCount 3 -1 roll put {Adobe_AGM_Core/AGMCORE_feature_ctm matrix currentmatrix put}if }def /end_feature { 2 dict begin /spd/setpagedevice load def /setpagedevice{get_gstate spd set_gstate}def stopped{$error/newerror false put}if end count Adobe_AGM_Core/AGMCORE_feature_opCount get sub dup 0 gt{{pop}repeat}{pop}ifelse countdictstack Adobe_AGM_Core/AGMCORE_feature_dictCount get sub dup 0 gt{{end}repeat}{pop}ifelse {Adobe_AGM_Core/AGMCORE_feature_ctm get setmatrix}if }def /set_negative { //Adobe_AGM_Core begin /AGMCORE_inverting exch def level2{ currentpagedevice/NegativePrint known AGMCORE_distilling not and{ currentpagedevice/NegativePrint get//Adobe_AGM_Core/AGMCORE_inverting get ne{ true begin_feature true{ <>setpagedevice }end_feature }if /AGMCORE_inverting false def }if }if AGMCORE_inverting{ [{1 exch sub}/exec load dup currenttransfer exch]cvx bind settransfer AGMCORE_distilling{ erasepage }{ gsave np clippath 1/setseparationgray where{pop setseparationgray}{setgray}ifelse /AGMIRS_&fill where{pop AGMIRS_&fill}{fill}ifelse grestore }ifelse }if end }def /lw_save_restore_override{ /md where{ pop md begin initializepage /initializepage{}def /pmSVsetup{}def /endp{}def /pse{}def /psb{}def /orig_showpage where {pop} {/orig_showpage/showpage load def} ifelse /showpage{orig_showpage gR}def end }if }def /pscript_showpage_override{ /NTPSOct95 where { begin showpage save /showpage/restore load def /restore{exch pop}def end }if }def /driver_media_override { /md where{ pop md/initializepage known{ md/initializepage{}put }if md/rC known{ md/rC{4{pop}repeat}put }if }if /mysetup where{ /mysetup[1 0 0 1 0 0]put }if Adobe_AGM_Core/AGMCORE_Default_CTM matrix currentmatrix put level2 {Adobe_AGM_Core/AGMCORE_Default_PageSize currentpagedevice/PageSize get put}if }def /capture_mysetup { /Pscript_Win_Data where{ pop Pscript_Win_Data/mysetup known{ Adobe_AGM_Core/save_mysetup Pscript_Win_Data/mysetup get put }if }if }def /restore_mysetup { /Pscript_Win_Data where{ pop Pscript_Win_Data/mysetup known{ Adobe_AGM_Core/save_mysetup known{ Pscript_Win_Data/mysetup Adobe_AGM_Core/save_mysetup get put Adobe_AGM_Core/save_mysetup undef }if }if }if }def /driver_check_media_override { /PrepsDict where {pop} { Adobe_AGM_Core/AGMCORE_Default_CTM get matrix currentmatrix ne Adobe_AGM_Core/AGMCORE_Default_PageSize get type/arraytype eq { Adobe_AGM_Core/AGMCORE_Default_PageSize get 0 get currentpagedevice/PageSize get 0 get eq and Adobe_AGM_Core/AGMCORE_Default_PageSize get 1 get currentpagedevice/PageSize get 1 get eq and }if { Adobe_AGM_Core/AGMCORE_Default_CTM get setmatrix }if }ifelse }def AGMCORE_err_strings begin /AGMCORE_bad_environ(Environment not satisfactory for this job. Ensure that the PPD is correct or that the PostScript level requested is supported by this printer. )def /AGMCORE_color_space_onhost_seps(This job contains colors that will not separate with on-host methods. )def /AGMCORE_invalid_color_space(This job contains an invalid color space. )def end /set_def_ht {AGMCORE_def_ht sethalftone}def /set_def_flat {AGMCORE_Default_flatness setflat}def end systemdict/setpacking known {setpacking}if %%EndResource %%BeginResource: procset Adobe_CoolType_Core 2.31 0 %%Copyright: Copyright 1997-2006 Adobe Systems Incorporated. All Rights Reserved. %%Version: 2.31 0 10 dict begin /Adobe_CoolType_Passthru currentdict def /Adobe_CoolType_Core_Defined userdict/Adobe_CoolType_Core known def Adobe_CoolType_Core_Defined {/Adobe_CoolType_Core userdict/Adobe_CoolType_Core get def} if userdict/Adobe_CoolType_Core 70 dict dup begin put /Adobe_CoolType_Version 2.31 def /Level2? systemdict/languagelevel known dup {pop systemdict/languagelevel get 2 ge} if def Level2? not { /currentglobal false def /setglobal/pop load def /gcheck{pop false}bind def /currentpacking false def /setpacking/pop load def /SharedFontDirectory 0 dict def } if currentpacking true setpacking currentglobal false setglobal userdict/Adobe_CoolType_Data 2 copy known not {2 copy 10 dict put} if get begin /@opStackCountByLevel 32 dict def /@opStackLevel 0 def /@dictStackCountByLevel 32 dict def /@dictStackLevel 0 def end setglobal currentglobal true setglobal userdict/Adobe_CoolType_GVMFonts known not {userdict/Adobe_CoolType_GVMFonts 10 dict put} if setglobal currentglobal false setglobal userdict/Adobe_CoolType_LVMFonts known not {userdict/Adobe_CoolType_LVMFonts 10 dict put} if setglobal /ct_VMDictPut { dup gcheck{Adobe_CoolType_GVMFonts}{Adobe_CoolType_LVMFonts}ifelse 3 1 roll put }bind def /ct_VMDictUndef { dup Adobe_CoolType_GVMFonts exch known {Adobe_CoolType_GVMFonts exch undef} { dup Adobe_CoolType_LVMFonts exch known {Adobe_CoolType_LVMFonts exch undef} {pop} ifelse }ifelse }bind def /ct_str1 1 string def /ct_xshow { /_ct_na exch def /_ct_i 0 def currentpoint /_ct_y exch def /_ct_x exch def { pop pop ct_str1 exch 0 exch put ct_str1 show {_ct_na _ct_i get}stopped {pop pop} { _ct_x _ct_y moveto 0 rmoveto } ifelse /_ct_i _ct_i 1 add def currentpoint /_ct_y exch def /_ct_x exch def } exch @cshow }bind def /ct_yshow { /_ct_na exch def /_ct_i 0 def currentpoint /_ct_y exch def /_ct_x exch def { pop pop ct_str1 exch 0 exch put ct_str1 show {_ct_na _ct_i get}stopped {pop pop} { _ct_x _ct_y moveto 0 exch rmoveto } ifelse /_ct_i _ct_i 1 add def currentpoint /_ct_y exch def /_ct_x exch def } exch @cshow }bind def /ct_xyshow { /_ct_na exch def /_ct_i 0 def currentpoint /_ct_y exch def /_ct_x exch def { pop pop ct_str1 exch 0 exch put ct_str1 show {_ct_na _ct_i get}stopped {pop pop} { {_ct_na _ct_i 1 add get}stopped {pop pop pop} { _ct_x _ct_y moveto rmoveto } ifelse } ifelse /_ct_i _ct_i 2 add def currentpoint /_ct_y exch def /_ct_x exch def } exch @cshow }bind def /xsh{{@xshow}stopped{Adobe_CoolType_Data begin ct_xshow end}if}bind def /ysh{{@yshow}stopped{Adobe_CoolType_Data begin ct_yshow end}if}bind def /xysh{{@xyshow}stopped{Adobe_CoolType_Data begin ct_xyshow end}if}bind def currentglobal true setglobal /ct_T3Defs { /BuildChar { 1 index/Encoding get exch get 1 index/BuildGlyph get exec }bind def /BuildGlyph { exch begin GlyphProcs exch get exec end }bind def }bind def setglobal /@_SaveStackLevels { Adobe_CoolType_Data begin /@vmState currentglobal def false setglobal @opStackCountByLevel @opStackLevel 2 copy known not { 2 copy 3 dict dup/args 7 index 5 add array put put get } { get dup/args get dup length 3 index lt { dup length 5 add array exch 1 index exch 0 exch putinterval 1 index exch/args exch put } {pop} ifelse } ifelse begin count 1 sub 1 index lt {pop count} if dup/argCount exch def dup 0 gt { args exch 0 exch getinterval astore pop } {pop} ifelse count /restCount exch def end /@opStackLevel @opStackLevel 1 add def countdictstack 1 sub @dictStackCountByLevel exch @dictStackLevel exch put /@dictStackLevel @dictStackLevel 1 add def @vmState setglobal end }bind def /@_RestoreStackLevels { Adobe_CoolType_Data begin /@opStackLevel @opStackLevel 1 sub def @opStackCountByLevel @opStackLevel get begin count restCount sub dup 0 gt {{pop}repeat} {pop} ifelse args 0 argCount getinterval{}forall end /@dictStackLevel @dictStackLevel 1 sub def @dictStackCountByLevel @dictStackLevel get end countdictstack exch sub dup 0 gt {{end}repeat} {pop} ifelse }bind def /@_PopStackLevels { Adobe_CoolType_Data begin /@opStackLevel @opStackLevel 1 sub def /@dictStackLevel @dictStackLevel 1 sub def end }bind def /@Raise { exch cvx exch errordict exch get exec stop }bind def /@ReRaise { cvx $error/errorname get errordict exch get exec stop }bind def /@Stopped { 0 @#Stopped }bind def /@#Stopped { @_SaveStackLevels stopped {@_RestoreStackLevels true} {@_PopStackLevels false} ifelse }bind def /@Arg { Adobe_CoolType_Data begin @opStackCountByLevel @opStackLevel 1 sub get begin args exch argCount 1 sub exch sub get end end }bind def currentglobal true setglobal /CTHasResourceForAllBug Level2? { 1 dict dup /@shouldNotDisappearDictValue true def Adobe_CoolType_Data exch/@shouldNotDisappearDict exch put begin count @_SaveStackLevels {(*){pop stop}128 string/Category resourceforall} stopped pop @_RestoreStackLevels currentdict Adobe_CoolType_Data/@shouldNotDisappearDict get dup 3 1 roll ne dup 3 1 roll { /@shouldNotDisappearDictValue known { { end currentdict 1 index eq {pop exit} if } loop } if } { pop end } ifelse } {false} ifelse def true setglobal /CTHasResourceStatusBug Level2? { mark {/steveamerige/Category resourcestatus} stopped {cleartomark true} {cleartomark currentglobal not} ifelse } {false} ifelse def setglobal /CTResourceStatus { mark 3 1 roll /Category findresource begin ({ResourceStatus}stopped)0()/SubFileDecode filter cvx exec {cleartomark false} {{3 2 roll pop true}{cleartomark false}ifelse} ifelse end }bind def /CTWorkAroundBugs { Level2? { /cid_PreLoad/ProcSet resourcestatus { pop pop currentglobal mark { (*) { dup/CMap CTHasResourceStatusBug {CTResourceStatus} {resourcestatus} ifelse { pop dup 0 eq exch 1 eq or { dup/CMap findresource gcheck setglobal /CMap undefineresource } { pop CTHasResourceForAllBug {exit} {stop} ifelse } ifelse } {pop} ifelse } 128 string/CMap resourceforall } stopped {cleartomark} stopped pop setglobal } if } if }bind def /ds { Adobe_CoolType_Core begin CTWorkAroundBugs /mo/moveto load def /nf/newencodedfont load def /msf{makefont setfont}bind def /uf{dup undefinefont ct_VMDictUndef}bind def /ur/undefineresource load def /chp/charpath load def /awsh/awidthshow load def /wsh/widthshow load def /ash/ashow load def /@xshow/xshow load def /@yshow/yshow load def /@xyshow/xyshow load def /@cshow/cshow load def /sh/show load def /rp/repeat load def /.n/.notdef def end currentglobal false setglobal userdict/Adobe_CoolType_Data 2 copy known not {2 copy 10 dict put} if get begin /AddWidths? false def /CC 0 def /charcode 2 string def /@opStackCountByLevel 32 dict def /@opStackLevel 0 def /@dictStackCountByLevel 32 dict def /@dictStackLevel 0 def /InVMFontsByCMap 10 dict def /InVMDeepCopiedFonts 10 dict def end setglobal }bind def /dt { currentdict Adobe_CoolType_Core eq {end} if }bind def /ps { Adobe_CoolType_Core begin Adobe_CoolType_GVMFonts begin Adobe_CoolType_LVMFonts begin SharedFontDirectory begin }bind def /pt { end end end end }bind def /unload { systemdict/languagelevel known { systemdict/languagelevel get 2 ge { userdict/Adobe_CoolType_Core 2 copy known {undef} {pop pop} ifelse } if } if }bind def /ndf { 1 index where {pop pop pop} {dup xcheck{bind}if def} ifelse }def /findfont systemdict begin userdict begin /globaldict where{/globaldict get begin}if dup where pop exch get /globaldict where{pop end}if end end Adobe_CoolType_Core_Defined {/systemfindfont exch def} { /findfont 1 index def /systemfindfont exch def } ifelse /undefinefont {pop}ndf /copyfont { currentglobal 3 1 roll 1 index gcheck setglobal dup null eq{0}{dup length}ifelse 2 index length add 1 add dict begin exch { 1 index/FID eq {pop pop} {def} ifelse } forall dup null eq {pop} {{def}forall} ifelse currentdict end exch setglobal }bind def /copyarray { currentglobal exch dup gcheck setglobal dup length array copy exch setglobal }bind def /newencodedfont { currentglobal { SharedFontDirectory 3 index known {SharedFontDirectory 3 index get/FontReferenced known} {false} ifelse } { FontDirectory 3 index known {FontDirectory 3 index get/FontReferenced known} { SharedFontDirectory 3 index known {SharedFontDirectory 3 index get/FontReferenced known} {false} ifelse } ifelse } ifelse dup { 3 index findfont/FontReferenced get 2 index dup type/nametype eq {findfont} if ne {pop false} if } if dup { 1 index dup type/nametype eq {findfont} if dup/CharStrings known { /CharStrings get length 4 index findfont/CharStrings get length ne { pop false } if } {pop} ifelse } if { pop 1 index findfont /Encoding get exch 0 1 255 {2 copy get 3 index 3 1 roll put} for pop pop pop } { currentglobal 4 1 roll dup type/nametype eq {findfont} if dup gcheck setglobal dup dup maxlength 2 add dict begin exch { 1 index/FID ne 2 index/Encoding ne and {def} {pop pop} ifelse } forall /FontReferenced exch def /Encoding exch dup length array copy def /FontName 1 index dup type/stringtype eq{cvn}if def dup currentdict end definefont ct_VMDictPut setglobal } ifelse }bind def /SetSubstituteStrategy { $SubstituteFont begin dup type/dicttype ne {0 dict} if currentdict/$Strategies known { exch $Strategies exch 2 copy known { get 2 copy maxlength exch maxlength add dict begin {def}forall {def}forall currentdict dup/$Init known {dup/$Init get exec} if end /$Strategy exch def } {pop pop pop} ifelse } {pop pop} ifelse end }bind def /scff { $SubstituteFont begin dup type/stringtype eq {dup length exch} {null} ifelse /$sname exch def /$slen exch def /$inVMIndex $sname null eq { 1 index $str cvs dup length $slen sub $slen getinterval cvn } {$sname} ifelse def end {findfont} @Stopped { dup length 8 add string exch 1 index 0(BadFont:)putinterval 1 index exch 8 exch dup length string cvs putinterval cvn {findfont} @Stopped {pop/Courier findfont} if } if $SubstituteFont begin /$sname null def /$slen 0 def /$inVMIndex null def end }bind def /isWidthsOnlyFont { dup/WidthsOnly known {pop pop true} { dup/FDepVector known {/FDepVector get{isWidthsOnlyFont dup{exit}if}forall} { dup/FDArray known {/FDArray get{isWidthsOnlyFont dup{exit}if}forall} {pop} ifelse } ifelse } ifelse }bind def /ct_StyleDicts 4 dict dup begin /Adobe-Japan1 4 dict dup begin Level2? { /Serif /HeiseiMin-W3-83pv-RKSJ-H/Font resourcestatus {pop pop/HeiseiMin-W3} { /CIDFont/Category resourcestatus { pop pop /HeiseiMin-W3/CIDFont resourcestatus {pop pop/HeiseiMin-W3} {/Ryumin-Light} ifelse } {/Ryumin-Light} ifelse } ifelse def /SansSerif /HeiseiKakuGo-W5-83pv-RKSJ-H/Font resourcestatus {pop pop/HeiseiKakuGo-W5} { /CIDFont/Category resourcestatus { pop pop /HeiseiKakuGo-W5/CIDFont resourcestatus {pop pop/HeiseiKakuGo-W5} {/GothicBBB-Medium} ifelse } {/GothicBBB-Medium} ifelse } ifelse def /HeiseiMaruGo-W4-83pv-RKSJ-H/Font resourcestatus {pop pop/HeiseiMaruGo-W4} { /CIDFont/Category resourcestatus { pop pop /HeiseiMaruGo-W4/CIDFont resourcestatus {pop pop/HeiseiMaruGo-W4} { /Jun101-Light-RKSJ-H/Font resourcestatus {pop pop/Jun101-Light} {SansSerif} ifelse } ifelse } { /Jun101-Light-RKSJ-H/Font resourcestatus {pop pop/Jun101-Light} {SansSerif} ifelse } ifelse } ifelse /RoundSansSerif exch def /Default Serif def } { /Serif/Ryumin-Light def /SansSerif/GothicBBB-Medium def { (fonts/Jun101-Light-83pv-RKSJ-H)status }stopped {pop}{ {pop pop pop pop/Jun101-Light} {SansSerif} ifelse /RoundSansSerif exch def }ifelse /Default Serif def } ifelse end def /Adobe-Korea1 4 dict dup begin /Serif/HYSMyeongJo-Medium def /SansSerif/HYGoThic-Medium def /RoundSansSerif SansSerif def /Default Serif def end def /Adobe-GB1 4 dict dup begin /Serif/STSong-Light def /SansSerif/STHeiti-Regular def /RoundSansSerif SansSerif def /Default Serif def end def /Adobe-CNS1 4 dict dup begin /Serif/MKai-Medium def /SansSerif/MHei-Medium def /RoundSansSerif SansSerif def /Default Serif def end def end def Level2?{currentglobal true setglobal}if /ct_BoldRomanWidthProc { stringwidth 1 index 0 ne{exch .03 add exch}if setcharwidth 0 0 }bind def /ct_Type0WidthProc { dup stringwidth 0 0 moveto 2 index true charpath pathbbox 0 -1 7 index 2 div .88 setcachedevice2 pop 0 0 }bind def /ct_Type0WMode1WidthProc { dup stringwidth pop 2 div neg -0.88 2 copy moveto 0 -1 5 -1 roll true charpath pathbbox setcachedevice }bind def /cHexEncoding [/c00/c01/c02/c03/c04/c05/c06/c07/c08/c09/c0A/c0B/c0C/c0D/c0E/c0F/c10/c11/c12 /c13/c14/c15/c16/c17/c18/c19/c1A/c1B/c1C/c1D/c1E/c1F/c20/c21/c22/c23/c24/c25 /c26/c27/c28/c29/c2A/c2B/c2C/c2D/c2E/c2F/c30/c31/c32/c33/c34/c35/c36/c37/c38 /c39/c3A/c3B/c3C/c3D/c3E/c3F/c40/c41/c42/c43/c44/c45/c46/c47/c48/c49/c4A/c4B /c4C/c4D/c4E/c4F/c50/c51/c52/c53/c54/c55/c56/c57/c58/c59/c5A/c5B/c5C/c5D/c5E /c5F/c60/c61/c62/c63/c64/c65/c66/c67/c68/c69/c6A/c6B/c6C/c6D/c6E/c6F/c70/c71 /c72/c73/c74/c75/c76/c77/c78/c79/c7A/c7B/c7C/c7D/c7E/c7F/c80/c81/c82/c83/c84 /c85/c86/c87/c88/c89/c8A/c8B/c8C/c8D/c8E/c8F/c90/c91/c92/c93/c94/c95/c96/c97 /c98/c99/c9A/c9B/c9C/c9D/c9E/c9F/cA0/cA1/cA2/cA3/cA4/cA5/cA6/cA7/cA8/cA9/cAA /cAB/cAC/cAD/cAE/cAF/cB0/cB1/cB2/cB3/cB4/cB5/cB6/cB7/cB8/cB9/cBA/cBB/cBC/cBD /cBE/cBF/cC0/cC1/cC2/cC3/cC4/cC5/cC6/cC7/cC8/cC9/cCA/cCB/cCC/cCD/cCE/cCF/cD0 /cD1/cD2/cD3/cD4/cD5/cD6/cD7/cD8/cD9/cDA/cDB/cDC/cDD/cDE/cDF/cE0/cE1/cE2/cE3 /cE4/cE5/cE6/cE7/cE8/cE9/cEA/cEB/cEC/cED/cEE/cEF/cF0/cF1/cF2/cF3/cF4/cF5/cF6 /cF7/cF8/cF9/cFA/cFB/cFC/cFD/cFE/cFF]def /ct_BoldBaseFont 11 dict begin /FontType 3 def /FontMatrix[1 0 0 1 0 0]def /FontBBox[0 0 1 1]def /Encoding cHexEncoding def /_setwidthProc/ct_BoldRomanWidthProc load def /_bcstr1 1 string def /BuildChar { exch begin _basefont setfont _bcstr1 dup 0 4 -1 roll put dup _setwidthProc 3 copy moveto show _basefonto setfont moveto show end }bind def currentdict end def systemdict/composefont known { /ct_DefineIdentity-H { /Identity-H/CMap resourcestatus { pop pop } { /CIDInit/ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo 3 dict dup begin /Registry(Adobe)def /Ordering(Identity)def /Supplement 0 def end def /CMapName/Identity-H def /CMapVersion 1.000 def /CMapType 1 def 1 begincodespacerange <0000> endcodespacerange 1 begincidrange <0000>0 endcidrange endcmap CMapName currentdict/CMap defineresource pop end end } ifelse } def /ct_BoldBaseCIDFont 11 dict begin /CIDFontType 1 def /CIDFontName/ct_BoldBaseCIDFont def /FontMatrix[1 0 0 1 0 0]def /FontBBox[0 0 1 1]def /_setwidthProc/ct_Type0WidthProc load def /_bcstr2 2 string def /BuildGlyph { exch begin _basefont setfont _bcstr2 1 2 index 256 mod put _bcstr2 0 3 -1 roll 256 idiv put _bcstr2 dup _setwidthProc 3 copy moveto show _basefonto setfont moveto show end }bind def currentdict end def }if Level2?{setglobal}if /ct_CopyFont{ { 1 index/FID ne 2 index/UniqueID ne and {def}{pop pop}ifelse }forall }bind def /ct_Type0CopyFont { exch dup length dict begin ct_CopyFont [ exch FDepVector { dup/FontType get 0 eq { 1 index ct_Type0CopyFont /_ctType0 exch definefont } { /_ctBaseFont exch 2 index exec } ifelse exch } forall pop ] /FDepVector exch def currentdict end }bind def /ct_MakeBoldFont { dup/ct_SyntheticBold known { dup length 3 add dict begin ct_CopyFont /ct_StrokeWidth .03 0 FontMatrix idtransform pop def /ct_SyntheticBold true def currentdict end definefont } { dup dup length 3 add dict begin ct_CopyFont /PaintType 2 def /StrokeWidth .03 0 FontMatrix idtransform pop def /dummybold currentdict end definefont dup/FontType get dup 9 ge exch 11 le and { ct_BoldBaseCIDFont dup length 3 add dict copy begin dup/CIDSystemInfo get/CIDSystemInfo exch def ct_DefineIdentity-H /_Type0Identity/Identity-H 3 -1 roll[exch]composefont /_basefont exch def /_Type0Identity/Identity-H 3 -1 roll[exch]composefont /_basefonto exch def currentdict end /CIDFont defineresource } { ct_BoldBaseFont dup length 3 add dict copy begin /_basefont exch def /_basefonto exch def currentdict end definefont } ifelse } ifelse }bind def /ct_MakeBold{ 1 index 1 index findfont currentglobal 5 1 roll dup gcheck setglobal dup /FontType get 0 eq { dup/WMode known{dup/WMode get 1 eq}{false}ifelse version length 4 ge and {version 0 4 getinterval cvi 2015 ge} {true} ifelse {/ct_Type0WidthProc} {/ct_Type0WMode1WidthProc} ifelse ct_BoldBaseFont/_setwidthProc 3 -1 roll load put {ct_MakeBoldFont}ct_Type0CopyFont definefont } { dup/_fauxfont known not 1 index/SubstMaster known not and { ct_BoldBaseFont/_setwidthProc /ct_BoldRomanWidthProc load put ct_MakeBoldFont } { 2 index 2 index eq {exch pop } { dup length dict begin ct_CopyFont currentdict end definefont } ifelse } ifelse } ifelse pop pop pop setglobal }bind def /?str1 256 string def /?set { $SubstituteFont begin /$substituteFound false def /$fontname 1 index def /$doSmartSub false def end dup findfont $SubstituteFont begin $substituteFound {false} { dup/FontName known { dup/FontName get $fontname eq 1 index/DistillerFauxFont known not and /currentdistillerparams where {pop false 2 index isWidthsOnlyFont not and} if } {false} ifelse } ifelse exch pop /$doSmartSub true def end { 5 1 roll pop pop pop pop findfont } { 1 index findfont dup/FontType get 3 eq { 6 1 roll pop pop pop pop pop false } {pop true} ifelse { $SubstituteFont begin pop pop /$styleArray 1 index def /$regOrdering 2 index def pop pop 0 1 $styleArray length 1 sub { $styleArray exch get ct_StyleDicts $regOrdering 2 copy known { get exch 2 copy known not {pop/Default} if get dup type/nametype eq { ?str1 cvs length dup 1 add exch ?str1 exch(-)putinterval exch dup length exch ?str1 exch 3 index exch putinterval add ?str1 exch 0 exch getinterval cvn } { pop pop/Unknown } ifelse } { pop pop pop pop/Unknown } ifelse } for end findfont }if } ifelse currentglobal false setglobal 3 1 roll null copyfont definefont pop setglobal }bind def setpacking userdict/$SubstituteFont 25 dict put 1 dict begin /SubstituteFont dup $error exch 2 copy known {get} {pop pop{pop/Courier}bind} ifelse def /currentdistillerparams where dup { pop pop currentdistillerparams/CannotEmbedFontPolicy 2 copy known {get/Error eq} {pop pop false} ifelse } if not { countdictstack array dictstack 0 get begin userdict begin $SubstituteFont begin /$str 128 string def /$fontpat 128 string def /$slen 0 def /$sname null def /$match false def /$fontname null def /$substituteFound false def /$inVMIndex null def /$doSmartSub true def /$depth 0 def /$fontname null def /$italicangle 26.5 def /$dstack null def /$Strategies 10 dict dup begin /$Type3Underprint { currentglobal exch false setglobal 11 dict begin /UseFont exch $WMode 0 ne { dup length dict copy dup/WMode $WMode put /UseFont exch definefont } if def /FontName $fontname dup type/stringtype eq{cvn}if def /FontType 3 def /FontMatrix[.001 0 0 .001 0 0]def /Encoding 256 array dup 0 1 255{/.notdef put dup}for pop def /FontBBox[0 0 0 0]def /CCInfo 7 dict dup begin /cc null def /x 0 def /y 0 def end def /BuildChar { exch begin CCInfo begin 1 string dup 0 3 index put exch pop /cc exch def UseFont 1000 scalefont setfont cc stringwidth/y exch def/x exch def x y setcharwidth $SubstituteFont/$Strategy get/$Underprint get exec 0 0 moveto cc show x y moveto end end }bind def currentdict end exch setglobal }bind def /$GetaTint 2 dict dup begin /$BuildFont { dup/WMode known {dup/WMode get} {0} ifelse /$WMode exch def $fontname exch dup/FontName known { dup/FontName get dup type/stringtype eq{cvn}if } {/unnamedfont} ifelse exch Adobe_CoolType_Data/InVMDeepCopiedFonts get 1 index/FontName get known { pop Adobe_CoolType_Data/InVMDeepCopiedFonts get 1 index get null copyfont } {$deepcopyfont} ifelse exch 1 index exch/FontBasedOn exch put dup/FontName $fontname dup type/stringtype eq{cvn}if put definefont Adobe_CoolType_Data/InVMDeepCopiedFonts get begin dup/FontBasedOn get 1 index def end }bind def /$Underprint { gsave x abs y abs gt {/y 1000 def} {/x -1000 def 500 120 translate} ifelse Level2? { [/Separation(All)/DeviceCMYK{0 0 0 1 pop}] setcolorspace } {0 setgray} ifelse 10 setlinewidth x .8 mul [7 3] { y mul 8 div 120 sub x 10 div exch moveto 0 y 4 div neg rlineto dup 0 rlineto 0 y 4 div rlineto closepath gsave Level2? {.2 setcolor} {.8 setgray} ifelse fill grestore stroke } forall pop grestore }bind def end def /$Oblique 1 dict dup begin /$BuildFont { currentglobal exch dup gcheck setglobal null copyfont begin /FontBasedOn currentdict/FontName known { FontName dup type/stringtype eq{cvn}if } {/unnamedfont} ifelse def /FontName $fontname dup type/stringtype eq{cvn}if def /currentdistillerparams where {pop} { /FontInfo currentdict/FontInfo known {FontInfo null copyfont} {2 dict} ifelse dup begin /ItalicAngle $italicangle def /FontMatrix FontMatrix [1 0 ItalicAngle dup sin exch cos div 1 0 0] matrix concatmatrix readonly end 4 2 roll def def } ifelse FontName currentdict end definefont exch setglobal }bind def end def /$None 1 dict dup begin /$BuildFont{}bind def end def end def /$Oblique SetSubstituteStrategy /$findfontByEnum { dup type/stringtype eq{cvn}if dup/$fontname exch def $sname null eq {$str cvs dup length $slen sub $slen getinterval} {pop $sname} ifelse $fontpat dup 0(fonts/*)putinterval exch 7 exch putinterval /$match false def $SubstituteFont/$dstack countdictstack array dictstack put mark { $fontpat 0 $slen 7 add getinterval {/$match exch def exit} $str filenameforall } stopped { cleardictstack currentdict true $SubstituteFont/$dstack get { exch { 1 index eq {pop false} {true} ifelse } {begin false} ifelse } forall pop } if cleartomark /$slen 0 def $match false ne {$match(fonts/)anchorsearch pop pop cvn} {/Courier} ifelse }bind def /$ROS 1 dict dup begin /Adobe 4 dict dup begin /Japan1 [/Ryumin-Light/HeiseiMin-W3 /GothicBBB-Medium/HeiseiKakuGo-W5 /HeiseiMaruGo-W4/Jun101-Light]def /Korea1 [/HYSMyeongJo-Medium/HYGoThic-Medium]def /GB1 [/STSong-Light/STHeiti-Regular]def /CNS1 [/MKai-Medium/MHei-Medium]def end def end def /$cmapname null def /$deepcopyfont { dup/FontType get 0 eq { 1 dict dup/FontName/copied put copyfont begin /FDepVector FDepVector copyarray 0 1 2 index length 1 sub { 2 copy get $deepcopyfont dup/FontName/copied put /copied exch definefont 3 copy put pop pop } for def currentdict end } {$Strategies/$Type3Underprint get exec} ifelse }bind def /$buildfontname { dup/CIDFont findresource/CIDSystemInfo get begin Registry length Ordering length Supplement 8 string cvs 3 copy length 2 add add add string dup 5 1 roll dup 0 Registry putinterval dup 4 index(-)putinterval dup 4 index 1 add Ordering putinterval 4 2 roll add 1 add 2 copy(-)putinterval end 1 add 2 copy 0 exch getinterval $cmapname $fontpat cvs exch anchorsearch {pop pop 3 2 roll putinterval cvn/$cmapname exch def} {pop pop pop pop pop} ifelse length $str 1 index(-)putinterval 1 add $str 1 index $cmapname $fontpat cvs putinterval $cmapname length add $str exch 0 exch getinterval cvn }bind def /$findfontByROS { /$fontname exch def $ROS Registry 2 copy known { get Ordering 2 copy known {get} {pop pop[]} ifelse } {pop pop[]} ifelse false exch { dup/CIDFont resourcestatus { pop pop save 1 index/CIDFont findresource dup/WidthsOnly known {dup/WidthsOnly get} {false} ifelse exch pop exch restore {pop} {exch pop true exit} ifelse } {pop} ifelse } forall {$str cvs $buildfontname} { false(*) { save exch dup/CIDFont findresource dup/WidthsOnly known {dup/WidthsOnly get not} {true} ifelse exch/CIDSystemInfo get dup/Registry get Registry eq exch/Ordering get Ordering eq and and {exch restore exch pop true exit} {pop restore} ifelse } $str/CIDFont resourceforall {$buildfontname} {$fontname $findfontByEnum} ifelse } ifelse }bind def end end currentdict/$error known currentdict/languagelevel known and dup {pop $error/SubstituteFont known} if dup {$error} {Adobe_CoolType_Core} ifelse begin { /SubstituteFont /CMap/Category resourcestatus { pop pop { $SubstituteFont begin /$substituteFound true def dup length $slen gt $sname null ne or $slen 0 gt and { $sname null eq {dup $str cvs dup length $slen sub $slen getinterval cvn} {$sname} ifelse Adobe_CoolType_Data/InVMFontsByCMap get 1 index 2 copy known { get false exch { pop currentglobal { GlobalFontDirectory 1 index known {exch pop true exit} {pop} ifelse } { FontDirectory 1 index known {exch pop true exit} { GlobalFontDirectory 1 index known {exch pop true exit} {pop} ifelse } ifelse } ifelse } forall } {pop pop false} ifelse { exch pop exch pop } { dup/CMap resourcestatus { pop pop dup/$cmapname exch def /CMap findresource/CIDSystemInfo get{def}forall $findfontByROS } { 128 string cvs dup(-)search { 3 1 roll search { 3 1 roll pop {dup cvi} stopped {pop pop pop pop pop $findfontByEnum} { 4 2 roll pop pop exch length exch 2 index length 2 index sub exch 1 sub -1 0 { $str cvs dup length 4 index 0 4 index 4 3 roll add getinterval exch 1 index exch 3 index exch putinterval dup/CMap resourcestatus { pop pop 4 1 roll pop pop pop dup/$cmapname exch def /CMap findresource/CIDSystemInfo get{def}forall $findfontByROS true exit } {pop} ifelse } for dup type/booleantype eq {pop} {pop pop pop $findfontByEnum} ifelse } ifelse } {pop pop pop $findfontByEnum} ifelse } {pop pop $findfontByEnum} ifelse } ifelse } ifelse } {//SubstituteFont exec} ifelse /$slen 0 def end } } { { $SubstituteFont begin /$substituteFound true def dup length $slen gt $sname null ne or $slen 0 gt and {$findfontByEnum} {//SubstituteFont exec} ifelse end } } ifelse bind readonly def Adobe_CoolType_Core/scfindfont/systemfindfont load put } { /scfindfont { $SubstituteFont begin dup systemfindfont dup/FontName known {dup/FontName get dup 3 index ne} {/noname true} ifelse dup { /$origfontnamefound 2 index def /$origfontname 4 index def/$substituteFound true def } if exch pop { $slen 0 gt $sname null ne 3 index length $slen gt or and { pop dup $findfontByEnum findfont dup maxlength 1 add dict begin {1 index/FID eq{pop pop}{def}ifelse} forall currentdict end definefont dup/FontName known{dup/FontName get}{null}ifelse $origfontnamefound ne { $origfontname $str cvs print ( substitution revised, using )print dup/FontName known {dup/FontName get}{(unspecified font)} ifelse $str cvs print(.\n)print } if } {exch pop} ifelse } {exch pop} ifelse end }bind def } ifelse end end Adobe_CoolType_Core_Defined not { Adobe_CoolType_Core/findfont { $SubstituteFont begin $depth 0 eq { /$fontname 1 index dup type/stringtype ne{$str cvs}if def /$substituteFound false def } if /$depth $depth 1 add def end scfindfont $SubstituteFont begin /$depth $depth 1 sub def $substituteFound $depth 0 eq and { $inVMIndex null ne {dup $inVMIndex $AddInVMFont} if $doSmartSub { currentdict/$Strategy known {$Strategy/$BuildFont get exec} if } if } if end }bind put } if } if end /$AddInVMFont { exch/FontName 2 copy known { get 1 dict dup begin exch 1 index gcheck def end exch Adobe_CoolType_Data/InVMFontsByCMap get exch $DictAdd } {pop pop pop} ifelse }bind def /$DictAdd { 2 copy known not {2 copy 4 index length dict put} if Level2? not { 2 copy get dup maxlength exch length 4 index length add lt 2 copy get dup length 4 index length add exch maxlength 1 index lt { 2 mul dict begin 2 copy get{forall}def 2 copy currentdict put end } {pop} ifelse } if get begin {def} forall end }bind def end end %%EndResource currentglobal true setglobal %%BeginResource: procset Adobe_CoolType_Utility_MAKEOCF 1.23 0 %%Copyright: Copyright 1987-2006 Adobe Systems Incorporated. %%Version: 1.23 0 systemdict/languagelevel known dup {currentglobal false setglobal} {false} ifelse exch userdict/Adobe_CoolType_Utility 2 copy known {2 copy get dup maxlength 27 add dict copy} {27 dict} ifelse put Adobe_CoolType_Utility begin /@eexecStartData def /@recognizeCIDFont null def /ct_Level2? exch def /ct_Clone? 1183615869 internaldict dup /CCRun known not exch/eCCRun known not ct_Level2? and or def ct_Level2? {globaldict begin currentglobal true setglobal} if /ct_AddStdCIDMap ct_Level2? {{ mark Adobe_CoolType_Utility/@recognizeCIDFont currentdict put { ((Hex)57 StartData 0615 1e27 2c39 1c60 d8a8 cc31 fe2b f6e0 7aa3 e541 e21c 60d8 a8c9 c3d0 6d9e 1c60 d8a8 c9c2 02d7 9a1c 60d8 a849 1c60 d8a8 cc36 74f4 1144 b13b 77)0()/SubFileDecode filter cvx exec } stopped { cleartomark Adobe_CoolType_Utility/@recognizeCIDFont get countdictstack dup array dictstack exch 1 sub -1 0 { 2 copy get 3 index eq {1 index length exch sub 1 sub{end}repeat exit} {pop} ifelse } for pop pop Adobe_CoolType_Utility/@eexecStartData get eexec } {cleartomark} ifelse }} {{ Adobe_CoolType_Utility/@eexecStartData get eexec }} ifelse bind def userdict/cid_extensions known dup{cid_extensions/cid_UpdateDB known and}if { cid_extensions begin /cid_GetCIDSystemInfo { 1 index type/stringtype eq {exch cvn exch} if cid_extensions begin dup load 2 index known { 2 copy cid_GetStatusInfo dup null ne { 1 index load 3 index get dup null eq {pop pop cid_UpdateDB} { exch 1 index/Created get eq {exch pop exch pop} {pop cid_UpdateDB} ifelse } ifelse } {pop cid_UpdateDB} ifelse } {cid_UpdateDB} ifelse end }bind def end } if ct_Level2? {end setglobal} if /ct_UseNativeCapability? systemdict/composefont known def /ct_MakeOCF 35 dict def /ct_Vars 25 dict def /ct_GlyphDirProcs 6 dict def /ct_BuildCharDict 15 dict dup begin /charcode 2 string def /dst_string 1500 string def /nullstring()def /usewidths? true def end def ct_Level2?{setglobal}{pop}ifelse ct_GlyphDirProcs begin /GetGlyphDirectory { systemdict/languagelevel known {pop/CIDFont findresource/GlyphDirectory get} { 1 index/CIDFont findresource/GlyphDirectory get dup type/dicttype eq { dup dup maxlength exch length sub 2 index lt { dup length 2 index add dict copy 2 index /CIDFont findresource/GlyphDirectory 2 index put } if } if exch pop exch pop } ifelse + }def /+ { systemdict/languagelevel known { currentglobal false setglobal 3 dict begin /vm exch def } {1 dict begin} ifelse /$ exch def systemdict/languagelevel known { vm setglobal /gvm currentglobal def $ gcheck setglobal } if ?{$ begin}if }def /?{$ type/dicttype eq}def /|{ userdict/Adobe_CoolType_Data known { Adobe_CoolType_Data/AddWidths? known { currentdict Adobe_CoolType_Data begin begin AddWidths? { Adobe_CoolType_Data/CC 3 index put ?{def}{$ 3 1 roll put}ifelse CC charcode exch 1 index 0 2 index 256 idiv put 1 index exch 1 exch 256 mod put stringwidth 2 array astore currentfont/Widths get exch CC exch put } {?{def}{$ 3 1 roll put}ifelse} ifelse end end } {?{def}{$ 3 1 roll put}ifelse} ifelse } {?{def}{$ 3 1 roll put}ifelse} ifelse }def /! { ?{end}if systemdict/languagelevel known {gvm setglobal} if end }def /:{string currentfile exch readstring pop}executeonly def end ct_MakeOCF begin /ct_cHexEncoding [/c00/c01/c02/c03/c04/c05/c06/c07/c08/c09/c0A/c0B/c0C/c0D/c0E/c0F/c10/c11/c12 /c13/c14/c15/c16/c17/c18/c19/c1A/c1B/c1C/c1D/c1E/c1F/c20/c21/c22/c23/c24/c25 /c26/c27/c28/c29/c2A/c2B/c2C/c2D/c2E/c2F/c30/c31/c32/c33/c34/c35/c36/c37/c38 /c39/c3A/c3B/c3C/c3D/c3E/c3F/c40/c41/c42/c43/c44/c45/c46/c47/c48/c49/c4A/c4B /c4C/c4D/c4E/c4F/c50/c51/c52/c53/c54/c55/c56/c57/c58/c59/c5A/c5B/c5C/c5D/c5E /c5F/c60/c61/c62/c63/c64/c65/c66/c67/c68/c69/c6A/c6B/c6C/c6D/c6E/c6F/c70/c71 /c72/c73/c74/c75/c76/c77/c78/c79/c7A/c7B/c7C/c7D/c7E/c7F/c80/c81/c82/c83/c84 /c85/c86/c87/c88/c89/c8A/c8B/c8C/c8D/c8E/c8F/c90/c91/c92/c93/c94/c95/c96/c97 /c98/c99/c9A/c9B/c9C/c9D/c9E/c9F/cA0/cA1/cA2/cA3/cA4/cA5/cA6/cA7/cA8/cA9/cAA /cAB/cAC/cAD/cAE/cAF/cB0/cB1/cB2/cB3/cB4/cB5/cB6/cB7/cB8/cB9/cBA/cBB/cBC/cBD /cBE/cBF/cC0/cC1/cC2/cC3/cC4/cC5/cC6/cC7/cC8/cC9/cCA/cCB/cCC/cCD/cCE/cCF/cD0 /cD1/cD2/cD3/cD4/cD5/cD6/cD7/cD8/cD9/cDA/cDB/cDC/cDD/cDE/cDF/cE0/cE1/cE2/cE3 /cE4/cE5/cE6/cE7/cE8/cE9/cEA/cEB/cEC/cED/cEE/cEF/cF0/cF1/cF2/cF3/cF4/cF5/cF6 /cF7/cF8/cF9/cFA/cFB/cFC/cFD/cFE/cFF]def /ct_CID_STR_SIZE 8000 def /ct_mkocfStr100 100 string def /ct_defaultFontMtx[.001 0 0 .001 0 0]def /ct_1000Mtx[1000 0 0 1000 0 0]def /ct_raise{exch cvx exch errordict exch get exec stop}bind def /ct_reraise {cvx $error/errorname get(Error: )print dup( )cvs print errordict exch get exec stop }bind def /ct_cvnsi { 1 index add 1 sub 1 exch 0 4 1 roll { 2 index exch get exch 8 bitshift add } for exch pop }bind def /ct_GetInterval { Adobe_CoolType_Utility/ct_BuildCharDict get begin /dst_index 0 def dup dst_string length gt {dup string/dst_string exch def} if 1 index ct_CID_STR_SIZE idiv /arrayIndex exch def 2 index arrayIndex get 2 index arrayIndex ct_CID_STR_SIZE mul sub { dup 3 index add 2 index length le { 2 index getinterval dst_string dst_index 2 index putinterval length dst_index add/dst_index exch def exit } { 1 index length 1 index sub dup 4 1 roll getinterval dst_string dst_index 2 index putinterval pop dup dst_index add/dst_index exch def sub /arrayIndex arrayIndex 1 add def 2 index dup length arrayIndex gt {arrayIndex get} { pop exit } ifelse 0 } ifelse } loop pop pop pop dst_string 0 dst_index getinterval end }bind def ct_Level2? { /ct_resourcestatus currentglobal mark true setglobal {/unknowninstancename/Category resourcestatus} stopped {cleartomark setglobal true} {cleartomark currentglobal not exch setglobal} ifelse { { mark 3 1 roll/Category findresource begin ct_Vars/vm currentglobal put ({ResourceStatus}stopped)0()/SubFileDecode filter cvx exec {cleartomark false} {{3 2 roll pop true}{cleartomark false}ifelse} ifelse ct_Vars/vm get setglobal end } } {{resourcestatus}} ifelse bind def /CIDFont/Category ct_resourcestatus {pop pop} { currentglobal true setglobal /Generic/Category findresource dup length dict copy dup/InstanceType/dicttype put /CIDFont exch/Category defineresource pop setglobal } ifelse ct_UseNativeCapability? { /CIDInit/ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo 3 dict dup begin /Registry(Adobe)def /Ordering(Identity)def /Supplement 0 def end def /CMapName/Identity-H def /CMapVersion 1.000 def /CMapType 1 def 1 begincodespacerange <0000> endcodespacerange 1 begincidrange <0000>0 endcidrange endcmap CMapName currentdict/CMap defineresource pop end end } if } { /ct_Category 2 dict begin /CIDFont 10 dict def /ProcSet 2 dict def currentdict end def /defineresource { ct_Category 1 index 2 copy known { get dup dup maxlength exch length eq { dup length 10 add dict copy ct_Category 2 index 2 index put } if 3 index 3 index put pop exch pop } {pop pop/defineresource/undefined ct_raise} ifelse }bind def /findresource { ct_Category 1 index 2 copy known { get 2 index 2 copy known {get 3 1 roll pop pop} {pop pop/findresource/undefinedresource ct_raise} ifelse } {pop pop/findresource/undefined ct_raise} ifelse }bind def /resourcestatus { ct_Category 1 index 2 copy known { get 2 index known exch pop exch pop { 0 -1 true } { false } ifelse } {pop pop/findresource/undefined ct_raise} ifelse }bind def /ct_resourcestatus/resourcestatus load def } ifelse /ct_CIDInit 2 dict begin /ct_cidfont_stream_init { { dup(Binary)eq { pop null currentfile ct_Level2? { {cid_BYTE_COUNT()/SubFileDecode filter} stopped {pop pop pop} if } if /readstring load exit } if dup(Hex)eq { pop currentfile ct_Level2? { {null exch/ASCIIHexDecode filter/readstring} stopped {pop exch pop(>)exch/readhexstring} if } {(>)exch/readhexstring} ifelse load exit } if /StartData/typecheck ct_raise } loop cid_BYTE_COUNT ct_CID_STR_SIZE le { 2 copy cid_BYTE_COUNT string exch exec pop 1 array dup 3 -1 roll 0 exch put } { cid_BYTE_COUNT ct_CID_STR_SIZE div ceiling cvi dup array exch 2 sub 0 exch 1 exch { 2 copy 5 index ct_CID_STR_SIZE string 6 index exec pop put pop } for 2 index cid_BYTE_COUNT ct_CID_STR_SIZE mod string 3 index exec pop 1 index exch 1 index length 1 sub exch put } ifelse cid_CIDFONT exch/GlyphData exch put 2 index null eq { pop pop pop } { pop/readstring load 1 string exch { 3 copy exec pop dup length 0 eq { pop pop pop pop pop true exit } if 4 index eq { pop pop pop pop false exit } if } loop pop } ifelse }bind def /StartData { mark { currentdict dup/FDArray get 0 get/FontMatrix get 0 get 0.001 eq { dup/CDevProc known not { /CDevProc 1183615869 internaldict/stdCDevProc 2 copy known {get} { pop pop {pop pop pop pop pop 0 -1000 7 index 2 div 880} } ifelse def } if } { /CDevProc { pop pop pop pop pop 0 1 cid_temp/cid_CIDFONT get /FDArray get 0 get /FontMatrix get 0 get div 7 index 2 div 1 index 0.88 mul }def } ifelse /cid_temp 15 dict def cid_temp begin /cid_CIDFONT exch def 3 copy pop dup/cid_BYTE_COUNT exch def 0 gt { ct_cidfont_stream_init FDArray { /Private get dup/SubrMapOffset known { begin /Subrs SubrCount array def Subrs SubrMapOffset SubrCount SDBytes ct_Level2? { currentdict dup/SubrMapOffset undef dup/SubrCount undef /SDBytes undef } if end /cid_SD_BYTES exch def /cid_SUBR_COUNT exch def /cid_SUBR_MAP_OFFSET exch def /cid_SUBRS exch def cid_SUBR_COUNT 0 gt { GlyphData cid_SUBR_MAP_OFFSET cid_SD_BYTES ct_GetInterval 0 cid_SD_BYTES ct_cvnsi 0 1 cid_SUBR_COUNT 1 sub { exch 1 index 1 add cid_SD_BYTES mul cid_SUBR_MAP_OFFSET add GlyphData exch cid_SD_BYTES ct_GetInterval 0 cid_SD_BYTES ct_cvnsi cid_SUBRS 4 2 roll GlyphData exch 4 index 1 index sub ct_GetInterval dup length string copy put } for pop } if } {pop} ifelse } forall } if cleartomark pop pop end CIDFontName currentdict/CIDFont defineresource pop end end } stopped {cleartomark/StartData ct_reraise} if }bind def currentdict end def /ct_saveCIDInit { /CIDInit/ProcSet ct_resourcestatus {true} {/CIDInitC/ProcSet ct_resourcestatus} ifelse { pop pop /CIDInit/ProcSet findresource ct_UseNativeCapability? {pop null} {/CIDInit ct_CIDInit/ProcSet defineresource pop} ifelse } {/CIDInit ct_CIDInit/ProcSet defineresource pop null} ifelse ct_Vars exch/ct_oldCIDInit exch put }bind def /ct_restoreCIDInit { ct_Vars/ct_oldCIDInit get dup null ne {/CIDInit exch/ProcSet defineresource pop} {pop} ifelse }bind def /ct_BuildCharSetUp { 1 index begin CIDFont begin Adobe_CoolType_Utility/ct_BuildCharDict get begin /ct_dfCharCode exch def /ct_dfDict exch def CIDFirstByte ct_dfCharCode add dup CIDCount ge {pop 0} if /cid exch def { GlyphDirectory cid 2 copy known {get} {pop pop nullstring} ifelse dup length FDBytes sub 0 gt { dup FDBytes 0 ne {0 FDBytes ct_cvnsi} {pop 0} ifelse /fdIndex exch def dup length FDBytes sub FDBytes exch getinterval /charstring exch def exit } { pop cid 0 eq {/charstring nullstring def exit} if /cid 0 def } ifelse } loop }def /ct_SetCacheDevice { 0 0 moveto dup stringwidth 3 -1 roll true charpath pathbbox 0 -1000 7 index 2 div 880 setcachedevice2 0 0 moveto }def /ct_CloneSetCacheProc { 1 eq { stringwidth pop -2 div -880 0 -1000 setcharwidth moveto } { usewidths? { currentfont/Widths get cid 2 copy known {get exch pop aload pop} {pop pop stringwidth} ifelse } {stringwidth} ifelse setcharwidth 0 0 moveto } ifelse }def /ct_Type3ShowCharString { ct_FDDict fdIndex 2 copy known {get} { currentglobal 3 1 roll 1 index gcheck setglobal ct_Type1FontTemplate dup maxlength dict copy begin FDArray fdIndex get dup/FontMatrix 2 copy known {get} {pop pop ct_defaultFontMtx} ifelse /FontMatrix exch dup length array copy def /Private get /Private exch def /Widths rootfont/Widths get def /CharStrings 1 dict dup/.notdef dup length string copy put def currentdict end /ct_Type1Font exch definefont dup 5 1 roll put setglobal } ifelse dup/CharStrings get 1 index/Encoding get ct_dfCharCode get charstring put rootfont/WMode 2 copy known {get} {pop pop 0} ifelse exch 1000 scalefont setfont ct_str1 0 ct_dfCharCode put ct_str1 exch ct_dfSetCacheProc ct_SyntheticBold { currentpoint ct_str1 show newpath moveto ct_str1 true charpath ct_StrokeWidth setlinewidth stroke } {ct_str1 show} ifelse }def /ct_Type4ShowCharString { ct_dfDict ct_dfCharCode charstring FDArray fdIndex get dup/FontMatrix get dup ct_defaultFontMtx ct_matrixeq not {ct_1000Mtx matrix concatmatrix concat} {pop} ifelse /Private get Adobe_CoolType_Utility/ct_Level2? get not { ct_dfDict/Private 3 -1 roll {put} 1183615869 internaldict/superexec get exec } if 1183615869 internaldict Adobe_CoolType_Utility/ct_Level2? get {1 index} {3 index/Private get mark 6 1 roll} ifelse dup/RunInt known {/RunInt get} {pop/CCRun} ifelse get exec Adobe_CoolType_Utility/ct_Level2? get not {cleartomark} if }bind def /ct_BuildCharIncremental { { Adobe_CoolType_Utility/ct_MakeOCF get begin ct_BuildCharSetUp ct_ShowCharString } stopped {stop} if end end end end }bind def /BaseFontNameStr(BF00)def /ct_Type1FontTemplate 14 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0]def /FontBBox [-250 -250 1250 1250]def /Encoding ct_cHexEncoding def /PaintType 0 def currentdict end def /BaseFontTemplate 11 dict begin /FontMatrix [0.001 0 0 0.001 0 0]def /FontBBox [-250 -250 1250 1250]def /Encoding ct_cHexEncoding def /BuildChar/ct_BuildCharIncremental load def ct_Clone? { /FontType 3 def /ct_ShowCharString/ct_Type3ShowCharString load def /ct_dfSetCacheProc/ct_CloneSetCacheProc load def /ct_SyntheticBold false def /ct_StrokeWidth 1 def } { /FontType 4 def /Private 1 dict dup/lenIV 4 put def /CharStrings 1 dict dup/.notdefput def /PaintType 0 def /ct_ShowCharString/ct_Type4ShowCharString load def } ifelse /ct_str1 1 string def currentdict end def /BaseFontDictSize BaseFontTemplate length 5 add def /ct_matrixeq { true 0 1 5 { dup 4 index exch get exch 3 index exch get eq and dup not {exit} if } for exch pop exch pop }bind def /ct_makeocf { 15 dict begin exch/WMode exch def exch/FontName exch def /FontType 0 def /FMapType 2 def dup/FontMatrix known {dup/FontMatrix get/FontMatrix exch def} {/FontMatrix matrix def} ifelse /bfCount 1 index/CIDCount get 256 idiv 1 add dup 256 gt{pop 256}if def /Encoding 256 array 0 1 bfCount 1 sub{2 copy dup put pop}for bfCount 1 255{2 copy bfCount put pop}for def /FDepVector bfCount dup 256 lt{1 add}if array def BaseFontTemplate BaseFontDictSize dict copy begin /CIDFont exch def CIDFont/FontBBox known {CIDFont/FontBBox get/FontBBox exch def} if CIDFont/CDevProc known {CIDFont/CDevProc get/CDevProc exch def} if currentdict end BaseFontNameStr 3(0)putinterval 0 1 bfCount dup 256 eq{1 sub}if { FDepVector exch 2 index BaseFontDictSize dict copy begin dup/CIDFirstByte exch 256 mul def FontType 3 eq {/ct_FDDict 2 dict def} if currentdict end 1 index 16 BaseFontNameStr 2 2 getinterval cvrs pop BaseFontNameStr exch definefont put } for ct_Clone? {/Widths 1 index/CIDFont get/GlyphDirectory get length dict def} if FontName currentdict end definefont ct_Clone? { gsave dup 1000 scalefont setfont ct_BuildCharDict begin /usewidths? false def currentfont/Widths get begin exch/CIDFont get/GlyphDirectory get { pop dup charcode exch 1 index 0 2 index 256 idiv put 1 index exch 1 exch 256 mod put stringwidth 2 array astore def } forall end /usewidths? true def end grestore } {exch pop} ifelse }bind def currentglobal true setglobal /ct_ComposeFont { ct_UseNativeCapability? { 2 index/CMap ct_resourcestatus {pop pop exch pop} { /CIDInit/ProcSet findresource begin 12 dict begin begincmap /CMapName 3 index def /CMapVersion 1.000 def /CMapType 1 def exch/WMode exch def /CIDSystemInfo 3 dict dup begin /Registry(Adobe)def /Ordering CMapName ct_mkocfStr100 cvs (Adobe-)search { pop pop (-)search { dup length string copy exch pop exch pop } {pop(Identity)} ifelse } {pop (Identity)} ifelse def /Supplement 0 def end def 1 begincodespacerange <0000> endcodespacerange 1 begincidrange <0000>0 endcidrange endcmap CMapName currentdict/CMap defineresource pop end end } ifelse composefont } { 3 2 roll pop 0 get/CIDFont findresource ct_makeocf } ifelse }bind def setglobal /ct_MakeIdentity { ct_UseNativeCapability? { 1 index/CMap ct_resourcestatus {pop pop} { /CIDInit/ProcSet findresource begin 12 dict begin begincmap /CMapName 2 index def /CMapVersion 1.000 def /CMapType 1 def /CIDSystemInfo 3 dict dup begin /Registry(Adobe)def /Ordering CMapName ct_mkocfStr100 cvs (Adobe-)search { pop pop (-)search {dup length string copy exch pop exch pop} {pop(Identity)} ifelse } {pop(Identity)} ifelse def /Supplement 0 def end def 1 begincodespacerange <0000> endcodespacerange 1 begincidrange <0000>0 endcidrange endcmap CMapName currentdict/CMap defineresource pop end end } ifelse composefont } { exch pop 0 get/CIDFont findresource ct_makeocf } ifelse }bind def currentdict readonly pop end end %%EndResource setglobal %%BeginResource: procset Adobe_CoolType_Utility_T42 1.0 0 %%Copyright: Copyright 1987-2004 Adobe Systems Incorporated. %%Version: 1.0 0 userdict/ct_T42Dict 15 dict put ct_T42Dict begin /Is2015? { version cvi 2015 ge }bind def /AllocGlyphStorage { Is2015? { pop } { {string}forall }ifelse }bind def /Type42DictBegin { 25 dict begin /FontName exch def /CharStrings 256 dict begin /.notdef 0 def currentdict end def /Encoding exch def /PaintType 0 def /FontType 42 def /FontMatrix[1 0 0 1 0 0]def 4 array astore cvx/FontBBox exch def /sfnts }bind def /Type42DictEnd { currentdict dup/FontName get exch definefont end ct_T42Dict exch dup/FontName get exch put }bind def /RD{string currentfile exch readstring pop}executeonly def /PrepFor2015 { Is2015? { /GlyphDirectory 16 dict def sfnts 0 get dup 2 index (glyx) putinterval 2 index (locx) putinterval pop pop } { pop pop }ifelse }bind def /AddT42Char { Is2015? { /GlyphDirectory get begin def end pop pop } { /sfnts get 4 index get 3 index 2 index putinterval pop pop pop pop }ifelse }bind def /T0AddT42Mtx2 { /CIDFont findresource/Metrics2 get begin def end }bind def end %%EndResource currentglobal true setglobal %%BeginFile: MMFauxFont.prc %%Copyright: Copyright 1987-2001 Adobe Systems Incorporated. %%All Rights Reserved. userdict /ct_EuroDict 10 dict put ct_EuroDict begin /ct_CopyFont { { 1 index /FID ne {def} {pop pop} ifelse} forall } def /ct_GetGlyphOutline { gsave initmatrix newpath exch findfont dup length 1 add dict begin ct_CopyFont /Encoding Encoding dup length array copy dup 4 -1 roll 0 exch put def currentdict end /ct_EuroFont exch definefont 1000 scalefont setfont 0 0 moveto [ <00> stringwidth <00> false charpath pathbbox [ {/m cvx} {/l cvx} {/c cvx} {/cp cvx} pathforall grestore counttomark 8 add } def /ct_MakeGlyphProc { ] cvx /ct_PSBuildGlyph cvx ] cvx } def /ct_PSBuildGlyph { gsave 8 -1 roll pop 7 1 roll 6 -2 roll ct_FontMatrix transform 6 2 roll 4 -2 roll ct_FontMatrix transform 4 2 roll ct_FontMatrix transform currentdict /PaintType 2 copy known {get 2 eq}{pop pop false} ifelse dup 9 1 roll { currentdict /StrokeWidth 2 copy known { get 2 div 0 ct_FontMatrix dtransform pop 5 1 roll 4 -1 roll 4 index sub 4 1 roll 3 -1 roll 4 index sub 3 1 roll exch 4 index add exch 4 index add 5 -1 roll pop } { pop pop } ifelse } if setcachedevice ct_FontMatrix concat ct_PSPathOps begin exec end { currentdict /StrokeWidth 2 copy known { get } { pop pop 0 } ifelse setlinewidth stroke } { fill } ifelse grestore } def /ct_PSPathOps 4 dict dup begin /m {moveto} def /l {lineto} def /c {curveto} def /cp {closepath} def end def /ct_matrix1000 [1000 0 0 1000 0 0] def /ct_AddGlyphProc { 2 index findfont dup length 4 add dict begin ct_CopyFont /CharStrings CharStrings dup length 1 add dict copy begin 3 1 roll def currentdict end def /ct_FontMatrix ct_matrix1000 FontMatrix matrix concatmatrix def /ct_PSBuildGlyph /ct_PSBuildGlyph load def /ct_PSPathOps /ct_PSPathOps load def currentdict end definefont pop } def systemdict /languagelevel known { /ct_AddGlyphToPrinterFont { 2 copy ct_GetGlyphOutline 3 add -1 roll restore ct_MakeGlyphProc ct_AddGlyphProc } def } { /ct_AddGlyphToPrinterFont { pop pop restore Adobe_CTFauxDict /$$$FONTNAME get /Euro Adobe_CTFauxDict /$$$SUBSTITUTEBASE get ct_EuroDict exch get ct_AddGlyphProc } def } ifelse /AdobeSansMM { 556 0 24 -19 541 703 { 541 628 m 510 669 442 703 354 703 c 201 703 117 607 101 444 c 50 444 l 25 372 l 97 372 l 97 301 l 49 301 l 24 229 l 103 229 l 124 67 209 -19 350 -19 c 435 -19 501 25 509 32 c 509 131 l 492 105 417 60 343 60 c 267 60 204 127 197 229 c 406 229 l 430 301 l 191 301 l 191 372 l 455 372 l 479 444 l 194 444 l 201 531 245 624 348 624 c 433 624 484 583 509 534 c cp 556 0 m } ct_PSBuildGlyph } def /AdobeSerifMM { 500 0 10 -12 484 692 { 347 298 m 171 298 l 170 310 170 322 170 335 c 170 362 l 362 362 l 374 403 l 172 403 l 184 580 244 642 308 642 c 380 642 434 574 457 457 c 481 462 l 474 691 l 449 691 l 433 670 429 657 410 657 c 394 657 360 692 299 692 c 204 692 94 604 73 403 c 22 403 l 10 362 l 70 362 l 69 352 69 341 69 330 c 69 319 69 308 70 298 c 22 298 l 10 257 l 73 257 l 97 57 216 -12 295 -12 c 364 -12 427 25 484 123 c 458 142 l 425 101 384 37 316 37 c 256 37 189 84 173 257 c 335 257 l cp 500 0 m } ct_PSBuildGlyph } def end %%EndFile setglobal Adobe_CoolType_Core begin /$Oblique SetSubstituteStrategy end %%BeginResource: procset Adobe_AGM_Image 1.0 0 %%Version: 1.0 0 %%Copyright: Copyright(C)2000-2006 Adobe Systems, Inc. All Rights Reserved. systemdict/setpacking known { currentpacking true setpacking }if userdict/Adobe_AGM_Image 71 dict dup begin put /Adobe_AGM_Image_Id/Adobe_AGM_Image_1.0_0 def /nd{ null def }bind def /AGMIMG_&image nd /AGMIMG_&colorimage nd /AGMIMG_&imagemask nd /AGMIMG_mbuf()def /AGMIMG_ybuf()def /AGMIMG_kbuf()def /AGMIMG_c 0 def /AGMIMG_m 0 def /AGMIMG_y 0 def /AGMIMG_k 0 def /AGMIMG_tmp nd /AGMIMG_imagestring0 nd /AGMIMG_imagestring1 nd /AGMIMG_imagestring2 nd /AGMIMG_imagestring3 nd /AGMIMG_imagestring4 nd /AGMIMG_imagestring5 nd /AGMIMG_cnt nd /AGMIMG_fsave nd /AGMIMG_colorAry nd /AGMIMG_override nd /AGMIMG_name nd /AGMIMG_maskSource nd /AGMIMG_flushfilters nd /invert_image_samples nd /knockout_image_samples nd /img nd /sepimg nd /devnimg nd /idximg nd /ds { Adobe_AGM_Core begin Adobe_AGM_Image begin /AGMIMG_&image systemdict/image get def /AGMIMG_&imagemask systemdict/imagemask get def /colorimage where{ pop /AGMIMG_&colorimage/colorimage ldf }if end end }def /ps { Adobe_AGM_Image begin /AGMIMG_ccimage_exists{/customcolorimage where { pop /Adobe_AGM_OnHost_Seps where { pop false }{ /Adobe_AGM_InRip_Seps where { pop false }{ true }ifelse }ifelse }{ false }ifelse }bdf level2{ /invert_image_samples { Adobe_AGM_Image/AGMIMG_tmp Decode length ddf /Decode[Decode 1 get Decode 0 get]def }def /knockout_image_samples { Operator/imagemask ne{ /Decode[1 1]def }if }def }{ /invert_image_samples { {1 exch sub}currenttransfer addprocs settransfer }def /knockout_image_samples { {pop 1}currenttransfer addprocs settransfer }def }ifelse /img/imageormask ldf /sepimg/sep_imageormask ldf /devnimg/devn_imageormask ldf /idximg/indexed_imageormask ldf /_ctype 7 def currentdict{ dup xcheck 1 index type dup/arraytype eq exch/packedarraytype eq or and{ bind }if def }forall }def /pt { end }def /dt { }def /AGMIMG_flushfilters { dup type/arraytype ne {1 array astore}if dup 0 get currentfile ne {dup 0 get flushfile}if { dup type/filetype eq { dup status 1 index currentfile ne and {closefile} {pop} ifelse }{pop}ifelse }forall }def /AGMIMG_init_common { currentdict/T known{/ImageType/T ldf currentdict/T undef}if currentdict/W known{/Width/W ldf currentdict/W undef}if currentdict/H known{/Height/H ldf currentdict/H undef}if currentdict/M known{/ImageMatrix/M ldf currentdict/M undef}if currentdict/BC known{/BitsPerComponent/BC ldf currentdict/BC undef}if currentdict/D known{/Decode/D ldf currentdict/D undef}if currentdict/DS known{/DataSource/DS ldf currentdict/DS undef}if currentdict/O known{ /Operator/O load 1 eq{ /imagemask }{ /O load 2 eq{ /image }{ /colorimage }ifelse }ifelse def currentdict/O undef }if currentdict/HSCI known{/HostSepColorImage/HSCI ldf currentdict/HSCI undef}if currentdict/MD known{/MultipleDataSources/MD ldf currentdict/MD undef}if currentdict/I known{/Interpolate/I ldf currentdict/I undef}if currentdict/SI known{/SkipImageProc/SI ldf currentdict/SI undef}if /DataSource load xcheck not{ DataSource type/arraytype eq{ DataSource 0 get type/filetype eq{ /_Filters DataSource def currentdict/MultipleDataSources known not{ /DataSource DataSource dup length 1 sub get def }if }if }if currentdict/MultipleDataSources known not{ /MultipleDataSources DataSource type/arraytype eq{ DataSource length 1 gt } {false}ifelse def }if }if /NComponents Decode length 2 div def currentdict/SkipImageProc known not{/SkipImageProc{false}def}if }bdf /imageormask_sys { begin AGMIMG_init_common save mark level2{ currentdict Operator/imagemask eq{ AGMIMG_&imagemask }{ use_mask{ process_mask AGMIMG_&image }{ AGMIMG_&image }ifelse }ifelse }{ Width Height Operator/imagemask eq{ Decode 0 get 1 eq Decode 1 get 0 eq and ImageMatrix/DataSource load AGMIMG_&imagemask }{ BitsPerComponent ImageMatrix/DataSource load AGMIMG_&image }ifelse }ifelse currentdict/_Filters known{_Filters AGMIMG_flushfilters}if cleartomark restore end }def /overprint_plate { currentoverprint{ 0 get dup type/nametype eq{ dup/DeviceGray eq{ pop AGMCORE_black_plate not }{ /DeviceCMYK eq{ AGMCORE_is_cmyk_sep not }if }ifelse }{ false exch { AGMOHS_sepink eq or }forall not }ifelse }{ pop false }ifelse }def /process_mask { level3{ dup begin /ImageType 1 def end 4 dict begin /DataDict exch def /ImageType 3 def /InterleaveType 3 def /MaskDict 9 dict begin /ImageType 1 def /Width DataDict dup/MaskWidth known{/MaskWidth}{/Width}ifelse get def /Height DataDict dup/MaskHeight known{/MaskHeight}{/Height}ifelse get def /ImageMatrix[Width 0 0 Height neg 0 Height]def /NComponents 1 def /BitsPerComponent 1 def /Decode DataDict dup/MaskD known{/MaskD}{[1 0]}ifelse get def /DataSource Adobe_AGM_Core/AGMIMG_maskSource get def currentdict end def currentdict end }if }def /use_mask { dup/Mask known {dup/Mask get}{false}ifelse }def /imageormask { begin AGMIMG_init_common SkipImageProc{ currentdict consumeimagedata } { save mark level2 AGMCORE_host_sep not and{ currentdict Operator/imagemask eq DeviceN_PS2 not and{ imagemask }{ AGMCORE_in_rip_sep currentoverprint and currentcolorspace 0 get/DeviceGray eq and{ [/Separation/Black/DeviceGray{}]setcolorspace /Decode[Decode 1 get Decode 0 get]def }if use_mask{ process_mask image }{ DeviceN_NoneName DeviceN_PS2 Indexed_DeviceN level3 not and or or AGMCORE_in_rip_sep and { Names convert_to_process not{ 2 dict begin /imageDict xdf /names_index 0 def gsave imageDict write_image_file{ Names{ dup(None)ne{ [/Separation 3 -1 roll/DeviceGray{1 exch sub}]setcolorspace Operator imageDict read_image_file names_index 0 eq{true setoverprint}if /names_index names_index 1 add def }{ pop }ifelse }forall close_image_file }if grestore end }{ Operator/imagemask eq{ imagemask }{ image }ifelse }ifelse }{ Operator/imagemask eq{ imagemask }{ image }ifelse }ifelse }ifelse }ifelse }{ Width Height Operator/imagemask eq{ Decode 0 get 1 eq Decode 1 get 0 eq and ImageMatrix/DataSource load /Adobe_AGM_OnHost_Seps where{ pop imagemask }{ currentgray 1 ne{ currentdict imageormask_sys }{ currentoverprint not{ 1 AGMCORE_&setgray currentdict imageormask_sys }{ currentdict ignoreimagedata }ifelse }ifelse }ifelse }{ BitsPerComponent ImageMatrix MultipleDataSources{ 0 1 NComponents 1 sub{ DataSource exch get }for }{ /DataSource load }ifelse Operator/colorimage eq{ AGMCORE_host_sep{ MultipleDataSources level2 or NComponents 4 eq and{ AGMCORE_is_cmyk_sep{ MultipleDataSources{ /DataSource DataSource 0 get xcheck { [ DataSource 0 get/exec cvx DataSource 1 get/exec cvx DataSource 2 get/exec cvx DataSource 3 get/exec cvx /AGMCORE_get_ink_data cvx ]cvx }{ DataSource aload pop AGMCORE_get_ink_data }ifelse def }{ /DataSource Width BitsPerComponent mul 7 add 8 idiv Height mul 4 mul /DataSource load filter_cmyk 0()/SubFileDecode filter def }ifelse /Decode[Decode 0 get Decode 1 get]def /MultipleDataSources false def /NComponents 1 def /Operator/image def invert_image_samples 1 AGMCORE_&setgray currentdict imageormask_sys }{ currentoverprint not Operator/imagemask eq and{ 1 AGMCORE_&setgray currentdict imageormask_sys }{ currentdict ignoreimagedata }ifelse }ifelse }{ MultipleDataSources NComponents AGMIMG_&colorimage }ifelse }{ true NComponents colorimage }ifelse }{ Operator/image eq{ AGMCORE_host_sep{ /DoImage true def currentdict/HostSepColorImage known{HostSepColorImage not}{false}ifelse { AGMCORE_black_plate not Operator/imagemask ne and{ /DoImage false def currentdict ignoreimagedata }if }if 1 AGMCORE_&setgray DoImage {currentdict imageormask_sys}if }{ use_mask{ process_mask image }{ image }ifelse }ifelse }{ Operator/knockout eq{ pop pop pop pop pop currentcolorspace overprint_plate not{ knockout_unitsq }if }if }ifelse }ifelse }ifelse }ifelse cleartomark restore }ifelse currentdict/_Filters known{_Filters AGMIMG_flushfilters}if end }def /sep_imageormask { /sep_colorspace_dict AGMCORE_gget begin CSA map_csa begin AGMIMG_init_common SkipImageProc{ currentdict consumeimagedata }{ save mark AGMCORE_avoid_L2_sep_space{ /Decode[Decode 0 get 255 mul Decode 1 get 255 mul]def }if AGMIMG_ccimage_exists MappedCSA 0 get/DeviceCMYK eq and currentdict/Components known and Name()ne and Name(All)ne and Operator/image eq and AGMCORE_producing_seps not and level2 not and { Width Height BitsPerComponent ImageMatrix [ /DataSource load/exec cvx { 0 1 2 index length 1 sub{ 1 index exch 2 copy get 255 xor put }for }/exec cvx ]cvx bind MappedCSA 0 get/DeviceCMYK eq{ Components aload pop }{ 0 0 0 Components aload pop 1 exch sub }ifelse Name findcmykcustomcolor customcolorimage }{ AGMCORE_producing_seps not{ level2{ //Adobe_AGM_Core/AGMCORE_pattern_paint_type get 2 ne AGMCORE_avoid_L2_sep_space not and currentcolorspace 0 get/Separation ne and{ [/Separation Name MappedCSA sep_proc_name exch dup 0 get 15 string cvs(/Device)anchorsearch{pop pop 0 get}{pop}ifelse exch load]setcolorspace_opt /sep_tint AGMCORE_gget setcolor }if currentdict imageormask }{ currentdict Operator/imagemask eq{ imageormask }{ sep_imageormask_lev1 }ifelse }ifelse }{ AGMCORE_host_sep{ Operator/knockout eq{ currentdict/ImageMatrix get concat knockout_unitsq }{ currentgray 1 ne{ AGMCORE_is_cmyk_sep Name(All)ne and{ level2{ Name AGMCORE_IsSeparationAProcessColor { Operator/imagemask eq{ //Adobe_AGM_Core/AGMCORE_pattern_paint_type get 2 ne{ /sep_tint AGMCORE_gget 1 exch sub AGMCORE_&setcolor }if }{ invert_image_samples }ifelse }{ //Adobe_AGM_Core/AGMCORE_pattern_paint_type get 2 ne{ [/Separation Name[/DeviceGray] { sep_colorspace_proc AGMCORE_get_ink_data 1 exch sub }bind ]AGMCORE_&setcolorspace /sep_tint AGMCORE_gget AGMCORE_&setcolor }if }ifelse currentdict imageormask_sys }{ currentdict Operator/imagemask eq{ imageormask_sys }{ sep_image_lev1_sep }ifelse }ifelse }{ Operator/imagemask ne{ invert_image_samples }if currentdict imageormask_sys }ifelse }{ currentoverprint not Name(All)eq or Operator/imagemask eq and{ currentdict imageormask_sys }{ currentoverprint not { gsave knockout_unitsq grestore }if currentdict consumeimagedata }ifelse }ifelse }ifelse }{ //Adobe_AGM_Core/AGMCORE_pattern_paint_type get 2 ne{ currentcolorspace 0 get/Separation ne{ [/Separation Name MappedCSA sep_proc_name exch 0 get exch load]setcolorspace_opt /sep_tint AGMCORE_gget setcolor }if }if currentoverprint MappedCSA 0 get/DeviceCMYK eq and Name AGMCORE_IsSeparationAProcessColor not and //Adobe_AGM_Core/AGMCORE_pattern_paint_type get 2 ne{Name inRip_spot_has_ink not and}{false}ifelse Name(All)ne and{ imageormask_l2_overprint }{ currentdict imageormask }ifelse }ifelse }ifelse }ifelse cleartomark restore }ifelse currentdict/_Filters known{_Filters AGMIMG_flushfilters}if end end }def /colorSpaceElemCnt { mark currentcolor counttomark dup 2 add 1 roll cleartomark }bdf /devn_sep_datasource { 1 dict begin /dataSource xdf [ 0 1 dataSource length 1 sub{ dup currentdict/dataSource get/exch cvx/get cvx/exec cvx /exch cvx names_index/ne cvx[/pop cvx]cvx/if cvx }for ]cvx bind end }bdf /devn_alt_datasource { 11 dict begin /convProc xdf /origcolorSpaceElemCnt xdf /origMultipleDataSources xdf /origBitsPerComponent xdf /origDecode xdf /origDataSource xdf /dsCnt origMultipleDataSources{origDataSource length}{1}ifelse def /DataSource origMultipleDataSources { [ BitsPerComponent 8 idiv origDecode length 2 idiv mul string 0 1 origDecode length 2 idiv 1 sub { dup 7 mul 1 add index exch dup BitsPerComponent 8 idiv mul exch origDataSource exch get 0()/SubFileDecode filter BitsPerComponent 8 idiv string/readstring cvx/pop cvx/putinterval cvx }for ]bind cvx }{origDataSource}ifelse 0()/SubFileDecode filter def [ origcolorSpaceElemCnt string 0 2 origDecode length 2 sub { dup origDecode exch get dup 3 -1 roll 1 add origDecode exch get exch sub 2 BitsPerComponent exp 1 sub div 1 BitsPerComponent 8 idiv{DataSource/read cvx/not cvx{0}/if cvx/mul cvx}repeat/mul cvx/add cvx }for /convProc load/exec cvx origcolorSpaceElemCnt 1 sub -1 0 { /dup cvx 2/add cvx/index cvx 3 1/roll cvx/exch cvx 255/mul cvx/cvi cvx/put cvx }for ]bind cvx 0()/SubFileDecode filter end }bdf /devn_imageormask { /devicen_colorspace_dict AGMCORE_gget begin CSA map_csa 2 dict begin dup /srcDataStrs[3 -1 roll begin AGMIMG_init_common currentdict/MultipleDataSources known{MultipleDataSources{DataSource length}{1}ifelse}{1}ifelse { Width Decode length 2 div mul cvi { dup 65535 gt{1 add 2 div cvi}{exit}ifelse }loop string }repeat end]def /dstDataStr srcDataStrs 0 get length string def begin AGMIMG_init_common SkipImageProc{ currentdict consumeimagedata }{ save mark AGMCORE_producing_seps not{ level3 not{ Operator/imagemask ne{ /DataSource[[ DataSource Decode BitsPerComponent currentdict/MultipleDataSources known{MultipleDataSources}{false}ifelse colorSpaceElemCnt/devicen_colorspace_dict AGMCORE_gget/TintTransform get devn_alt_datasource 1/string cvx/readstring cvx/pop cvx]cvx colorSpaceElemCnt 1 sub{dup}repeat]def /MultipleDataSources true def /Decode colorSpaceElemCnt[exch{0 1}repeat]def }if }if currentdict imageormask }{ AGMCORE_host_sep{ Names convert_to_process{ CSA get_csa_by_name 0 get/DeviceCMYK eq{ /DataSource Width BitsPerComponent mul 7 add 8 idiv Height mul 4 mul DataSource Decode BitsPerComponent currentdict/MultipleDataSources known{MultipleDataSources}{false}ifelse 4/devicen_colorspace_dict AGMCORE_gget/TintTransform get devn_alt_datasource filter_cmyk 0()/SubFileDecode filter def /MultipleDataSources false def /Decode[1 0]def /DeviceGray setcolorspace currentdict imageormask_sys }{ AGMCORE_report_unsupported_color_space AGMCORE_black_plate{ /DataSource DataSource Decode BitsPerComponent currentdict/MultipleDataSources known{MultipleDataSources}{false}ifelse CSA get_csa_by_name 0 get/DeviceRGB eq{3}{1}ifelse/devicen_colorspace_dict AGMCORE_gget/TintTransform get devn_alt_datasource /MultipleDataSources false def /Decode colorSpaceElemCnt[exch{0 1}repeat]def currentdict imageormask_sys }{ gsave knockout_unitsq grestore currentdict consumeimagedata }ifelse }ifelse } { /devicen_colorspace_dict AGMCORE_gget/names_index known{ Operator/imagemask ne{ MultipleDataSources{ /DataSource[DataSource devn_sep_datasource/exec cvx]cvx def /MultipleDataSources false def }{ /DataSource/DataSource load dstDataStr srcDataStrs 0 get filter_devn def }ifelse invert_image_samples }if currentdict imageormask_sys }{ currentoverprint not Operator/imagemask eq and{ currentdict imageormask_sys }{ currentoverprint not { gsave knockout_unitsq grestore }if currentdict consumeimagedata }ifelse }ifelse }ifelse }{ currentdict imageormask }ifelse }ifelse cleartomark restore }ifelse currentdict/_Filters known{_Filters AGMIMG_flushfilters}if end end end }def /imageormask_l2_overprint { currentdict currentcmykcolor add add add 0 eq{ currentdict consumeimagedata }{ level3{ currentcmykcolor /AGMIMG_k xdf /AGMIMG_y xdf /AGMIMG_m xdf /AGMIMG_c xdf Operator/imagemask eq{ [/DeviceN[ AGMIMG_c 0 ne{/Cyan}if AGMIMG_m 0 ne{/Magenta}if AGMIMG_y 0 ne{/Yellow}if AGMIMG_k 0 ne{/Black}if ]/DeviceCMYK{}]setcolorspace AGMIMG_c 0 ne{AGMIMG_c}if AGMIMG_m 0 ne{AGMIMG_m}if AGMIMG_y 0 ne{AGMIMG_y}if AGMIMG_k 0 ne{AGMIMG_k}if setcolor }{ /Decode[Decode 0 get 255 mul Decode 1 get 255 mul]def [/Indexed [ /DeviceN[ AGMIMG_c 0 ne{/Cyan}if AGMIMG_m 0 ne{/Magenta}if AGMIMG_y 0 ne{/Yellow}if AGMIMG_k 0 ne{/Black}if ] /DeviceCMYK{ AGMIMG_k 0 eq{0}if AGMIMG_y 0 eq{0 exch}if AGMIMG_m 0 eq{0 3 1 roll}if AGMIMG_c 0 eq{0 4 1 roll}if } ] 255 { 255 div mark exch dup dup dup AGMIMG_k 0 ne{ /sep_tint AGMCORE_gget mul MappedCSA sep_proc_name exch pop load exec 4 1 roll pop pop pop counttomark 1 roll }{ pop }ifelse AGMIMG_y 0 ne{ /sep_tint AGMCORE_gget mul MappedCSA sep_proc_name exch pop load exec 4 2 roll pop pop pop counttomark 1 roll }{ pop }ifelse AGMIMG_m 0 ne{ /sep_tint AGMCORE_gget mul MappedCSA sep_proc_name exch pop load exec 4 3 roll pop pop pop counttomark 1 roll }{ pop }ifelse AGMIMG_c 0 ne{ /sep_tint AGMCORE_gget mul MappedCSA sep_proc_name exch pop load exec pop pop pop counttomark 1 roll }{ pop }ifelse counttomark 1 add -1 roll pop } ]setcolorspace }ifelse imageormask_sys }{ write_image_file{ currentcmykcolor 0 ne{ [/Separation/Black/DeviceGray{}]setcolorspace gsave /Black [{1 exch sub/sep_tint AGMCORE_gget mul}/exec cvx MappedCSA sep_proc_name cvx exch pop{4 1 roll pop pop pop 1 exch sub}/exec cvx] cvx modify_halftone_xfer Operator currentdict read_image_file grestore }if 0 ne{ [/Separation/Yellow/DeviceGray{}]setcolorspace gsave /Yellow [{1 exch sub/sep_tint AGMCORE_gget mul}/exec cvx MappedCSA sep_proc_name cvx exch pop{4 2 roll pop pop pop 1 exch sub}/exec cvx] cvx modify_halftone_xfer Operator currentdict read_image_file grestore }if 0 ne{ [/Separation/Magenta/DeviceGray{}]setcolorspace gsave /Magenta [{1 exch sub/sep_tint AGMCORE_gget mul}/exec cvx MappedCSA sep_proc_name cvx exch pop{4 3 roll pop pop pop 1 exch sub}/exec cvx] cvx modify_halftone_xfer Operator currentdict read_image_file grestore }if 0 ne{ [/Separation/Cyan/DeviceGray{}]setcolorspace gsave /Cyan [{1 exch sub/sep_tint AGMCORE_gget mul}/exec cvx MappedCSA sep_proc_name cvx exch pop{pop pop pop 1 exch sub}/exec cvx] cvx modify_halftone_xfer Operator currentdict read_image_file grestore }if close_image_file }{ imageormask }ifelse }ifelse }ifelse }def /indexed_imageormask { begin AGMIMG_init_common save mark currentdict AGMCORE_host_sep{ Operator/knockout eq{ /indexed_colorspace_dict AGMCORE_gget dup/CSA known{ /CSA get get_csa_by_name }{ /Names get }ifelse overprint_plate not{ knockout_unitsq }if }{ Indexed_DeviceN{ /devicen_colorspace_dict AGMCORE_gget dup/names_index known exch/Names get convert_to_process or{ indexed_image_lev2_sep }{ currentoverprint not{ knockout_unitsq }if currentdict consumeimagedata }ifelse }{ AGMCORE_is_cmyk_sep{ Operator/imagemask eq{ imageormask_sys }{ level2{ indexed_image_lev2_sep }{ indexed_image_lev1_sep }ifelse }ifelse }{ currentoverprint not{ knockout_unitsq }if currentdict consumeimagedata }ifelse }ifelse }ifelse }{ level2{ Indexed_DeviceN{ /indexed_colorspace_dict AGMCORE_gget begin }{ /indexed_colorspace_dict AGMCORE_gget dup null ne { begin currentdict/CSDBase known{CSDBase/CSD get_res/MappedCSA get}{CSA}ifelse get_csa_by_name 0 get/DeviceCMYK eq ps_level 3 ge and ps_version 3015.007 lt and AGMCORE_in_rip_sep and{ [/Indexed[/DeviceN[/Cyan/Magenta/Yellow/Black]/DeviceCMYK{}]HiVal Lookup] setcolorspace }if end } {pop}ifelse }ifelse imageormask Indexed_DeviceN{ end }if }{ Operator/imagemask eq{ imageormask }{ indexed_imageormask_lev1 }ifelse }ifelse }ifelse cleartomark restore currentdict/_Filters known{_Filters AGMIMG_flushfilters}if end }def /indexed_image_lev2_sep { /indexed_colorspace_dict AGMCORE_gget begin begin Indexed_DeviceN not{ currentcolorspace dup 1/DeviceGray put dup 3 currentcolorspace 2 get 1 add string 0 1 2 3 AGMCORE_get_ink_data 4 currentcolorspace 3 get length 1 sub { dup 4 idiv exch currentcolorspace 3 get exch get 255 exch sub 2 index 3 1 roll put }for put setcolorspace }if currentdict Operator/imagemask eq{ AGMIMG_&imagemask }{ use_mask{ process_mask AGMIMG_&image }{ AGMIMG_&image }ifelse }ifelse end end }def /OPIimage { dup type/dicttype ne{ 10 dict begin /DataSource xdf /ImageMatrix xdf /BitsPerComponent xdf /Height xdf /Width xdf /ImageType 1 def /Decode[0 1 def] currentdict end }if dup begin /NComponents 1 cdndf /MultipleDataSources false cdndf /SkipImageProc{false}cdndf /Decode[ 0 currentcolorspace 0 get/Indexed eq{ 2 BitsPerComponent exp 1 sub }{ 1 }ifelse ]cdndf /Operator/image cdndf end /sep_colorspace_dict AGMCORE_gget null eq{ imageormask }{ gsave dup begin invert_image_samples end sep_imageormask grestore }ifelse }def /cachemask_level2 { 3 dict begin /LZWEncode filter/WriteFilter xdf /readBuffer 256 string def /ReadFilter currentfile 0(%EndMask)/SubFileDecode filter /ASCII85Decode filter /RunLengthDecode filter def { ReadFilter readBuffer readstring exch WriteFilter exch writestring not{exit}if }loop WriteFilter closefile end }def /spot_alias { /mapto_sep_imageormask { dup type/dicttype ne{ 12 dict begin /ImageType 1 def /DataSource xdf /ImageMatrix xdf /BitsPerComponent xdf /Height xdf /Width xdf /MultipleDataSources false def }{ begin }ifelse /Decode[/customcolor_tint AGMCORE_gget 0]def /Operator/image def /SkipImageProc{false}def currentdict end sep_imageormask }bdf /customcolorimage { Adobe_AGM_Image/AGMIMG_colorAry xddf /customcolor_tint AGMCORE_gget << /Name AGMIMG_colorAry 4 get /CSA[/DeviceCMYK] /TintMethod/Subtractive /TintProc null /MappedCSA null /NComponents 4 /Components[AGMIMG_colorAry aload pop pop] >> setsepcolorspace mapto_sep_imageormask }ndf Adobe_AGM_Image/AGMIMG_&customcolorimage/customcolorimage load put /customcolorimage { Adobe_AGM_Image/AGMIMG_override false put current_spot_alias{dup 4 get map_alias}{false}ifelse { false set_spot_alias /customcolor_tint AGMCORE_gget exch setsepcolorspace pop mapto_sep_imageormask true set_spot_alias }{ //Adobe_AGM_Image/AGMIMG_&customcolorimage get exec }ifelse }bdf }def /snap_to_device { 6 dict begin matrix currentmatrix dup 0 get 0 eq 1 index 3 get 0 eq and 1 index 1 get 0 eq 2 index 2 get 0 eq and or exch pop { 1 1 dtransform 0 gt exch 0 gt/AGMIMG_xSign? exch def/AGMIMG_ySign? exch def 0 0 transform AGMIMG_ySign?{floor 0.1 sub}{ceiling 0.1 add}ifelse exch AGMIMG_xSign?{floor 0.1 sub}{ceiling 0.1 add}ifelse exch itransform/AGMIMG_llY exch def/AGMIMG_llX exch def 1 1 transform AGMIMG_ySign?{ceiling 0.1 add}{floor 0.1 sub}ifelse exch AGMIMG_xSign?{ceiling 0.1 add}{floor 0.1 sub}ifelse exch itransform/AGMIMG_urY exch def/AGMIMG_urX exch def [AGMIMG_urX AGMIMG_llX sub 0 0 AGMIMG_urY AGMIMG_llY sub AGMIMG_llX AGMIMG_llY]concat }{ }ifelse end }def level2 not{ /colorbuf { 0 1 2 index length 1 sub{ dup 2 index exch get 255 exch sub 2 index 3 1 roll put }for }def /tint_image_to_color { begin Width Height BitsPerComponent ImageMatrix /DataSource load end Adobe_AGM_Image begin /AGMIMG_mbuf 0 string def /AGMIMG_ybuf 0 string def /AGMIMG_kbuf 0 string def { colorbuf dup length AGMIMG_mbuf length ne { dup length dup dup /AGMIMG_mbuf exch string def /AGMIMG_ybuf exch string def /AGMIMG_kbuf exch string def }if dup AGMIMG_mbuf copy AGMIMG_ybuf copy AGMIMG_kbuf copy pop } addprocs {AGMIMG_mbuf}{AGMIMG_ybuf}{AGMIMG_kbuf}true 4 colorimage end }def /sep_imageormask_lev1 { begin MappedCSA 0 get dup/DeviceRGB eq exch/DeviceCMYK eq or has_color not and{ { 255 mul round cvi GrayLookup exch get }currenttransfer addprocs settransfer currentdict imageormask }{ /sep_colorspace_dict AGMCORE_gget/Components known{ MappedCSA 0 get/DeviceCMYK eq{ Components aload pop }{ 0 0 0 Components aload pop 1 exch sub }ifelse Adobe_AGM_Image/AGMIMG_k xddf Adobe_AGM_Image/AGMIMG_y xddf Adobe_AGM_Image/AGMIMG_m xddf Adobe_AGM_Image/AGMIMG_c xddf AGMIMG_y 0.0 eq AGMIMG_m 0.0 eq and AGMIMG_c 0.0 eq and{ {AGMIMG_k mul 1 exch sub}currenttransfer addprocs settransfer currentdict imageormask }{ currentcolortransfer {AGMIMG_k mul 1 exch sub}exch addprocs 4 1 roll {AGMIMG_y mul 1 exch sub}exch addprocs 4 1 roll {AGMIMG_m mul 1 exch sub}exch addprocs 4 1 roll {AGMIMG_c mul 1 exch sub}exch addprocs 4 1 roll setcolortransfer currentdict tint_image_to_color }ifelse }{ MappedCSA 0 get/DeviceGray eq{ {255 mul round cvi ColorLookup exch get 0 get}currenttransfer addprocs settransfer currentdict imageormask }{ MappedCSA 0 get/DeviceCMYK eq{ currentcolortransfer {255 mul round cvi ColorLookup exch get 3 get 1 exch sub}exch addprocs 4 1 roll {255 mul round cvi ColorLookup exch get 2 get 1 exch sub}exch addprocs 4 1 roll {255 mul round cvi ColorLookup exch get 1 get 1 exch sub}exch addprocs 4 1 roll {255 mul round cvi ColorLookup exch get 0 get 1 exch sub}exch addprocs 4 1 roll setcolortransfer currentdict tint_image_to_color }{ currentcolortransfer {pop 1}exch addprocs 4 1 roll {255 mul round cvi ColorLookup exch get 2 get}exch addprocs 4 1 roll {255 mul round cvi ColorLookup exch get 1 get}exch addprocs 4 1 roll {255 mul round cvi ColorLookup exch get 0 get}exch addprocs 4 1 roll setcolortransfer currentdict tint_image_to_color }ifelse }ifelse }ifelse }ifelse end }def /sep_image_lev1_sep { begin /sep_colorspace_dict AGMCORE_gget/Components known{ Components aload pop Adobe_AGM_Image/AGMIMG_k xddf Adobe_AGM_Image/AGMIMG_y xddf Adobe_AGM_Image/AGMIMG_m xddf Adobe_AGM_Image/AGMIMG_c xddf {AGMIMG_c mul 1 exch sub} {AGMIMG_m mul 1 exch sub} {AGMIMG_y mul 1 exch sub} {AGMIMG_k mul 1 exch sub} }{ {255 mul round cvi ColorLookup exch get 0 get 1 exch sub} {255 mul round cvi ColorLookup exch get 1 get 1 exch sub} {255 mul round cvi ColorLookup exch get 2 get 1 exch sub} {255 mul round cvi ColorLookup exch get 3 get 1 exch sub} }ifelse AGMCORE_get_ink_data currenttransfer addprocs settransfer currentdict imageormask_sys end }def /indexed_imageormask_lev1 { /indexed_colorspace_dict AGMCORE_gget begin begin currentdict MappedCSA 0 get dup/DeviceRGB eq exch/DeviceCMYK eq or has_color not and{ {HiVal mul round cvi GrayLookup exch get HiVal div}currenttransfer addprocs settransfer imageormask }{ MappedCSA 0 get/DeviceGray eq{ {HiVal mul round cvi Lookup exch get HiVal div}currenttransfer addprocs settransfer imageormask }{ MappedCSA 0 get/DeviceCMYK eq{ currentcolortransfer {4 mul HiVal mul round cvi 3 add Lookup exch get HiVal div 1 exch sub}exch addprocs 4 1 roll {4 mul HiVal mul round cvi 2 add Lookup exch get HiVal div 1 exch sub}exch addprocs 4 1 roll {4 mul HiVal mul round cvi 1 add Lookup exch get HiVal div 1 exch sub}exch addprocs 4 1 roll {4 mul HiVal mul round cvi Lookup exch get HiVal div 1 exch sub}exch addprocs 4 1 roll setcolortransfer tint_image_to_color }{ currentcolortransfer {pop 1}exch addprocs 4 1 roll {3 mul HiVal mul round cvi 2 add Lookup exch get HiVal div}exch addprocs 4 1 roll {3 mul HiVal mul round cvi 1 add Lookup exch get HiVal div}exch addprocs 4 1 roll {3 mul HiVal mul round cvi Lookup exch get HiVal div}exch addprocs 4 1 roll setcolortransfer tint_image_to_color }ifelse }ifelse }ifelse end end }def /indexed_image_lev1_sep { /indexed_colorspace_dict AGMCORE_gget begin begin {4 mul HiVal mul round cvi Lookup exch get HiVal div 1 exch sub} {4 mul HiVal mul round cvi 1 add Lookup exch get HiVal div 1 exch sub} {4 mul HiVal mul round cvi 2 add Lookup exch get HiVal div 1 exch sub} {4 mul HiVal mul round cvi 3 add Lookup exch get HiVal div 1 exch sub} AGMCORE_get_ink_data currenttransfer addprocs settransfer currentdict imageormask_sys end end }def }if end systemdict/setpacking known {setpacking}if %%EndResource currentdict Adobe_AGM_Utils eq {end} if %%EndProlog %%BeginSetup Adobe_AGM_Utils begin 2 2010 Adobe_AGM_Core/ds gx Adobe_CoolType_Core/ds get exec Adobe_AGM_Image/ds gx currentdict Adobe_AGM_Utils eq {end} if %%EndSetup %%Page: 1 1 %%EndPageComments %%BeginPageSetup %ADOBeginClientInjection: PageSetup Start "AI11EPS" %AI12_RMC_Transparency: Balance=75 RasterRes=300 GradRes=150 Text=0 Stroke=1 Clip=1 OP=0 %ADOEndClientInjection: PageSetup Start "AI11EPS" Adobe_AGM_Utils begin Adobe_AGM_Core/ps gx Adobe_AGM_Utils/capture_cpd gx Adobe_CoolType_Core/ps get exec Adobe_AGM_Image/ps gx %ADOBeginClientInjection: PageSetup End "AI11EPS" /currentdistillerparams where {pop currentdistillerparams /CoreDistVersion get 5000 lt} {true} ifelse { userdict /AI11_PDFMark5 /cleartomark load put userdict /AI11_ReadMetadata_PDFMark5 {flushfile cleartomark } bind put} { userdict /AI11_PDFMark5 /pdfmark load put userdict /AI11_ReadMetadata_PDFMark5 {/PUT pdfmark} bind put } ifelse [/NamespacePush AI11_PDFMark5 [/_objdef {ai_metadata_stream_123} /type /stream /OBJ AI11_PDFMark5 [{ai_metadata_stream_123} currentfile 0 (% &&end XMP packet marker&&) /SubFileDecode filter AI11_ReadMetadata_PDFMark5 Adobe Illustrator CS4 2009-09-08T17:05:09+02:00 2009-09-08T17:05:10+02:00 2009-09-08T17:05:10+02:00 256 256 JPEG /9j/4AAQSkZJRgABAgEASABIAAD/7QAsUGhvdG9zaG9wIDMuMAA4QklNA+0AAAAAABAASAAAAAEA AQBIAAAAAQAB/+4ADkFkb2JlAGTAAAAAAf/bAIQABgQEBAUEBgUFBgkGBQYJCwgGBggLDAoKCwoK DBAMDAwMDAwQDA4PEA8ODBMTFBQTExwbGxscHx8fHx8fHx8fHwEHBwcNDA0YEBAYGhURFRofHx8f Hx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8f/8AAEQgBAAEAAwER AAIRAQMRAf/EAaIAAAAHAQEBAQEAAAAAAAAAAAQFAwIGAQAHCAkKCwEAAgIDAQEBAQEAAAAAAAAA AQACAwQFBgcICQoLEAACAQMDAgQCBgcDBAIGAnMBAgMRBAAFIRIxQVEGE2EicYEUMpGhBxWxQiPB UtHhMxZi8CRygvElQzRTkqKyY3PCNUQnk6OzNhdUZHTD0uIIJoMJChgZhJRFRqS0VtNVKBry4/PE 1OT0ZXWFlaW1xdXl9WZ2hpamtsbW5vY3R1dnd4eXp7fH1+f3OEhYaHiImKi4yNjo+Ck5SVlpeYmZ qbnJ2en5KjpKWmp6ipqqusra6voRAAICAQIDBQUEBQYECAMDbQEAAhEDBCESMUEFURNhIgZxgZEy obHwFMHR4SNCFVJicvEzJDRDghaSUyWiY7LCB3PSNeJEgxdUkwgJChgZJjZFGidkdFU38qOzwygp 0+PzhJSktMTU5PRldYWVpbXF1eX1RlZmdoaWprbG1ub2R1dnd4eXp7fH1+f3OEhYaHiImKi4yNjo +DlJWWl5iZmpucnZ6fkqOkpaanqKmqq6ytrq+v/aAAwDAQACEQMRAD8A9TTTQwQvNM6xQxKXllch VVVFWZmOwAHU4QCTQQSALLwH8wf+cjLtppdP8nKsUKEq2ryqGd6HrDGw4qP8pwSfAZvtL2SKvJ8n Q6rtYnbHy73jOreYNc1iYzarqFxfSE15TyvJT5BiQB7DNxDFGAqIAdRPJKZuRtAZNg7FXYq7FXYq 7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FUfpPmDXdHmE2lahcWMnc28rx 170IUgEexyE8UZipAFnDJKBuJI9z2b8vf+cjLpZotO84qskLUVdXiXi6knrNGoow/wApACPA5p9V 2SKvH8nb6XtYjbJy73v0M0M8KTQussMqh4pUIZWVhVWVhsQR0OaEgg0XfAgiw8B/5yM/MKZrpfJ2 nS8YY1WXV2UkFnajRwn2UUdvEkeGb7snSivEPwdD2tqrPhjl1eFZu3SuxV2KuxV2KuxV2KuxV2Ku xV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV7t/zjn+YU4um8m6hIXhkV5tI ZiSVZQXlhH+SVq48KHxzSdraUV4g+LuuydUQfDPLo8Z8w6tLrGvahqsv2764luCDvQSOWC/IA0Gb jFDgiIjoHUZJmcjI9TaX5Ng7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FX Yq7FXYq7FXYq7FXYq7FUw8u6vLo+v6dqsRIexuIp9u4RwSvyI2OV5cfHEx7wzx5DCQkOhS/LGDsV ewfkB5F8q+aYtcOvWAvTaNbC3Jklj4+oJef926VrwHXNT2pqcmLh4DV3+h2fZmlx5jLjF1XUjv7n rn/Kjvys/wCrEv8A0kXX/VXNV/Kef+d9g/U7b+StP/N/2Uv1u/5Ud+Vn/ViX/pIuv+quP8p5/wCd 9g/Uv8laf+b/ALKX63f8qO/Kz/qxL/0kXX/VXH+U8/8AO+wfqX+StP8Azf8AZS/W7/lR35Wf9WJf +ki6/wCquP8AKef+d9g/Uv8AJWn/AJv+yl+t3/Kjvys/6sS/9JF1/wBVcf5Tz/zvsH6l/krT/wA3 /ZS/W7/lR35Wf9WJf+ki6/6q4/ynn/nfYP1L/JWn/m/7KX63f8qO/Kz/AKsS/wDSRdf9Vcf5Tz/z vsH6l/krT/zf9lL9bv8AlR35Wf8AViX/AKSLr/qrj/Kef+d9g/Uv8laf+b/spfrd/wAqO/Kz/qxL /wBJF1/1Vx/lPP8AzvsH6l/krT/zf9lL9bv+VHflZ/1Yl/6SLr/qrj/Kef8AnfYP1L/JWn/m/wCy l+t3/Kjvys/6sS/9JF1/1Vx/lPP/ADvsH6l/krT/AM3/AGUv1vDPz38p+X/LPmmystDtBZ2stik0 kYeSSshmlUtWRnPRRm77Nzzy4yZGzf6A6TtHBDFl4YChwg/ae95tmwcF9ef8qO/Kz/qxL/0kXX/V XOU/lPP/ADvsH6nqf5K0/wDN/wBlL9bv+VHflZ/1Yl/6SLr/AKq4/wAp5/532D9S/wAlaf8Am/7K X63f8qO/Kz/qxL/0kXX/AFVx/lPP/O+wfqX+StP/ADf9lL9bxL8+vJ/lzyxrmmW2hWYs4J7ZpJkD yScnEhWtZGc9M3PZmonlgTM3u6ftHTwxTAgK28z97zDNk652KuxV2KuxV9G/lL+VXkHXfy+0rVdV 0pbm/uPrHrTmadOXC5ljX4UkVdlUDYZz2u12WGUxiaAroO53+i0GHJiEpDc31Pf72X/8qO/Kz/qx L/0kXX/VXMX+U8/877B+pyv5K0/83/ZS/W7/AJUd+Vn/AFYl/wCki6/6q4/ynn/nfYP1L/JWn/m/ 7KX62EfnH+V/kTy/5Eu9T0jS1tb6OWBUmEs70DyBW2eRl3B8MzNBrcuTKIyNj3Bw9doMWPEZRFH3 n9b56zfuidirsVdirsVe/f8AOLP9z5l/1rP9U+aLtr+D4/od32Lzn/m/757xmid87FXYq7FXYq7F XYq7FXYq7FXYq+aP+cnP+U203/tmp/yfmzpOx/7o/wBb9AeZ7X/vh/VH3yeQZtnWPvbOFe5dirsV fOP/ADlB/wApLo3/ADBt/wAnTnRdjfRL3vO9sf3g9zxfNw6h2KuxV2KuxV9b/kOyt+VWigEEqboM Aeh+tymh+g5ynaf9/L4fcHquzf7iPx+8s/zAc92KvOP+cgv/ACWN9/xmtv8Ak8ubHsr++HuLru1P 7k/B8oZ1Ly7sVdirsVdir37/AJxZ/ufMv+tZ/qnzRdtfwfH9Du+xec/83/fPeM0TvnYq7FXYq7FX Yq7FXYq7FXYq7FXzR/zk5/ym2m/9s1P+T82dJ2P/AHR/rfoDzPa/98P6o++TyDNs6x97Zwr3LsVd ir5x/wCcoP8AlJdG/wCYNv8Ak6c6Lsb6Je953tj+8HueL5uHUOxV2KuxV2Kvq/8A5x9/8ljY/wDG a5/5PNnLdq/3x9weo7L/ALkfF6Pmudi7FXnH/OQX/ksb7/jNbf8AJ5c2PZX98PcXXdqf3J+D5Qzq Xl3Yq7FXYq7FXp/5L/mfoHkePV11a3u5zfm3MP1VI3p6Ik5cvUki/wB+ClM1vaGjnm4eEja+fwdj 2fq44TLiveuXlf63pf8A0M35D/5YNV/5FW//AGUZrf5Hy98ft/U7P+WMXdL7P1u/6Gb8h/8ALBqv /Iq3/wCyjH+R8vfH7f1L/LGLul9n63f9DN+Q/wDlg1X/AJFW/wD2UY/yPl74/b+pf5Yxd0vs/W7/ AKGb8h/8sGq/8irf/sox/kfL3x+39S/yxi7pfZ+t3/QzfkP/AJYNV/5FW/8A2UY/yPl74/b+pf5Y xd0vs/W7/oZvyH/ywar/AMirf/sox/kfL3x+39S/yxi7pfZ+t3/QzfkP/lg1X/kVb/8AZRj/ACPl 74/b+pf5Yxd0vs/W7/oZvyH/AMsGq/8AIq3/AOyjH+R8vfH7f1L/ACxi7pfZ+t3/AEM35D/5YNV/ 5FW//ZRj/I+Xvj9v6l/ljF3S+z9bv+hm/If/ACwar/yKt/8Asox/kfL3x+39S/yxi7pfZ+t3/Qzf kP8A5YNV/wCRVv8A9lGP8j5e+P2/qX+WMXdL7P1vIPzi8+aR508xWmpaXDcQwQWa2zrdKiOXWWR6 gRvIKUcd822g00sMDGVc72+Dp9fqI5snFHlw19p/WwTM5w33tnCvcuxV2KvnH/nKD/lJdG/5g2/5 OnOi7G+iXved7Y/vB7ni+bh1DsVdirsVdir6v/5x9/8AJY2P/Ga5/wCTzZy3av8AfH3B6jsv+5Hx ej5rnYuxV5x/zkF/5LG+/wCM1t/yeXNj2V/fD3F13an9yfg+UM6l5d2KuxV2KuxV2KuxV2KuxVHa Fo15rWsWmk2XH61eyCGH1DxXk3SpoaZDJkEImR5BnjgZyERzL0X/AKFv/MT+ax/5Ht/zRmv/AJWw +fydh/JOby+bv+hb/wAxP5rH/ke3/NGP8rYfP5L/ACTm8vm7/oW/8xP5rH/ke3/NGP8AK2Hz+S/y Tm8vm8613RrzRdYu9JveP1qykMM3pnkvJetDQVzYY8gnESHIuvyQMJGJ5hA5Ng7FXYq7FXYq+9s4 V7l2KuxV84/85Qf8pLo3/MG3/J050XY30S97zvbH94Pc8XzcOodirsVdirsVfV//ADj7/wCSxsf+ M1z/AMnmzlu1f74+4PUdl/3I+L0fNc7F2KvOP+cgv/JY33/Ga2/5PLmx7K/vh7i67tT+5PwfKGdS 8u7FXYq7FXYq7FXYq7FXYqyv8qf/ACY3l7/mMjzF1v8Acy9zk6P++j732XnHvYOxV2KvjT81v/Jj eYf+YyTOw0X9zH3PH6z++l72KZlOM7FXYq7FXYq+9s4V7l2KuxV84/8AOUH/ACkujf8AMG3/ACdO dF2N9Eve872x/eD3PF83DqHYq7FXYq7FX1f/AM4+/wDksbH/AIzXP/J5s5btX++PuD1HZf8Acj4v R81zsXYq84/5yC/8ljff8Zrb/k8ubHsr++HuLru1P7k/B8oZ1Ly7sVdirsVdirsVdirsVdirK/yp /wDJjeXv+YyPMXW/3Mvc5Oj/AL6PvfZece9g7FXYq+NPzW/8mN5h/wCYyTOw0X9zH3PH6z++l72K ZlOM7FXYq7FXYq+9s4V7l2KuxV84/wDOUH/KS6N/zBt/ydOdF2N9Eve872x/eD3PF83DqHYq7FXY q7FX1f8A84+/+Sxsf+M1z/yebOW7V/vj7g9R2X/cj4vR81zsXYq84/5yC/8AJY33/Ga2/wCTy5se yv74e4uu7U/uT8HyhnUvLuxV2KuxV2KuxV2KuxV2Ksr/ACp/8mN5e/5jI8xdb/cy9zk6P++j732X nHvYOxV2KvjT81v/ACY3mH/mMkzsNF/cx9zx+s/vpe9imZTjOxV2KuxV2KvvbOFe5dirsVfOP/OU H/KS6N/zBt/ydOdF2N9Eve872x/eD3PF83DqHYq7FXYq7FX1f/zj7/5LGx/4zXP/ACebOW7V/vj7 g9R2X/cj4vR81zsXYq84/wCcgv8AyWN9/wAZrb/k8ubHsr++HuLru1P7k/B8oZ1Ly7sVdirsVdir sVdirsVdirK/yp/8mN5e/wCYyPMXW/3Mvc5Oj/vo+99l5x72DsVdir40/Nb/AMmN5h/5jJM7DRf3 Mfc8frP76XvYpmU4zsVdirsVdir72zhXuXYq7FXzj/zlB/ykujf8wbf8nTnRdjfRL3vO9sf3g9zx fNw6h2KuxV2KuxV9X/8AOPv/AJLGx/4zXP8AyebOW7V/vj7g9R2X/cj4vR81zsXYq84/5yC/8ljf f8Zrb/k8ubHsr++HuLru1P7k/B8oZ1Ly7sVdirsVdirsVdirsVdirK/yp/8AJjeXv+YyPMXW/wBz L3OTo/76PvfZece9g7FXYq+NPzW/8mN5h/5jJM7DRf3Mfc8frP76XvYpmU4zsVdirsVdir72zhXu XYq7FXzj/wA5Qf8AKS6N/wAwbf8AJ050XY30S97zvbH94Pc8XzcOodirsVdirsVfV/8Azj7/AOSx sf8AjNc/8nmzlu1f74+4PUdl/wByPi9HzXOxdirzj/nIL/yWN9/xmtv+Ty5seyv74e4uu7U/uT8H yhnUvLuxV2KuxV2KuxV2KuxV2Ksr/Kn/AMmN5e/5jI8xdb/cy9zk6P8Avo+99l5x72DsVdir40/N b/yY3mH/AJjJM7DRf3Mfc8frP76XvYpmU4zsVdirsVdir7l8tXwv/LmlXw6Xdnbziv8AxZErfxzi s0eGch3EvaYJ8UInvATHKm12KvE/+cndBluNF0nXIlJWxme3uKAmiXABRj4ANFT5tm67Gy1KUe/9 Dpe2MZMYy7tvm+dc6B0DsVdirsVdir7O/LHQJtA8haNpk4K3EcHqzowoVknYzOh/1Wk4/RnH63KJ 5ZSHL9Wz1+ixHHijE8/17sozFcp2KvKP+clLwQ+QIIARyur+JOPfiscjk/eoza9jxvKT3R/U6rti VYgO+X6C+YM6V5t2KuxV2KuxV2KuxV2KuxVlf5U/+TG8vf8AMZHmLrf7mXucnR/30fe+y8497B2K uxV8afmt/wCTG8w/8xkmdhov7mPueP1n99L3sUzKcZ2KuxV2KuxV9ZfkLr66r+XNlCXLXGlvJZTV 60Q84qe3pOo+jOV7UxcOYnpLd6jsvLxYQOsdnoma92LsVQGvaJYa5o93pGoJ6lnexmKVdqiu4Za9 GVqMp7EZZiyGEhIcw15cQyRMZci+OPPHknWPKGuS6ZqKEpUtaXQFEnirs69f9kvY51+m1EcseIPI 6jTyxS4Zf2sfy9odirsVerfkb+V1zr+rw+YNTh46DYSc4g42uZ0Pwqo7ojbuehpx8aavtLWjHHgj 9Z+x2fZ2jOSXGfoH2/j9j6gzmXpnYq7FXzn/AM5Oa+lxrul6HEwP1CF7i4oeklwQFVh4qkfL5NnQ 9j4qgZd/6Hnu2MtzEf5o+/8AH2vFc3Lp3Yq7FXYq7FXYq7FXYq7FWV/lT/5Mby9/zGR5i63+5l7n J0f99H3vsvOPewdirsVfGn5rf+TG8w/8xkmdhov7mPueP1n99L3sUzKcZ2KuxV2KuxV6l/zj55zT Q/NzaTdPxsdcCQAnotypPon/AGXJk+ZHhms7U0/Hj4hzj93V2XZeo4MnCeUvv6PqTOYendirsVSr zL5X0LzLpj6brNqt1bMeS12dHHR43HxK3uPl0y3DnnjlxRNFqzYIZI8MhYeBefv+ceptC06+1nSN UWfTrON55Le7UrMqIK0V4wVkP+xXN9pe1RMiMh6j3fj9bodV2UYAyibiO/n+37HkenWUt/qFtYwl Vlu5UgjZ6hQ0jBQWoCaVPhm1lLhBPc6uMeIgDq+hPJ//ADjXpNlNHdeZr39JSLQ/UbcNHBy/y3P7 xx8gv05odR2vI7QFef4/a73T9kAb5DfkP1/2PZre2t7aCO3tokgt4VCRQxqERFUUCqq0AA8BmmJJ Nnm7mMQBQ5KmBLsVQurapY6Tpl1qV9IIrOzjaaeQ9lQVNB3J6Adzk8cDOQiOZYZJiETI8g+J/NPm C68w+Yb/AFq62lvpmk4VrwTpHGD4IgCj5Z2eHEMcBEdHjc2U5JGR5lK8sa3Yq7FXYq7FXYq7FXYq 7FWV/lT/AOTG8vf8xkeYut/uZe5ydH/fR977Lzj3sHYq7FXxp+a3/kxvMP8AzGSZ2Gi/uY+54/Wf 30vexTMpxnYq7FXYq7FXAlSCDQjcEdQcVfWP5NfmbD5u0RbO+lUeYbBAt2h2M0Y2WdR3r0enRvAE Zy3aGj8KVj6D+Keo7P1nixon1j7fP9b0XNc7F2KuxVi/5o/+S78xf8wM3/EcytF/fR97ja3+5l7n yN5T/wCUq0b/AJjrb/k8udXn+iXuLymD64+8PuHOKe0dirsVdir5x/P780I9VuD5U0ebnp1q4bUr hGqs0y7iJadUjO58W/1anouy9FwDxJczy8nne09Zxnw4/SOfn+Pv9zxfNw6h2KuxV2KuxV2KuxV2 KuxV2Ksr/Kn/AMmN5e/5jI8xdb/cy9zk6P8Avo+99l5x72DsVdir40/Nb/yY3mH/AJjJM7DRf3Mf c8frP76XvYpmU4zsVdirsVdirsVRuia1qeiapb6ppk7W99atzhlX5UIIOxVgaEHqMhkxxnExluCy hMwkJDYh9Uflj+cGi+cLaO0uWSx8wqtJbJjRZSBu9uSfiHfj9pfcb5zGs0EsRsbw/HN6fR6+OUUd p/f7noOa92DsVYv+aP8A5LvzF/zAzf8AEcytF/fR97ja3+5l7nyN5T/5SrRv+Y62/wCTy51ef6Je 4vKYPrj7w+4c4p7R2KtO6IjO7BUUFmZjQADckk4gK8D/ADb/AD4SSOfQfKM9Vasd5rCbbdCtsfvB k/4H+bN9oezK9eT5fr/U6HXdp3cMfz/V+v5PBs3jpHYq7FXYq7FXYq7FXYq7FXYq7FWU/lbJHH+Y nl95GCIt5GWZjQD5k5jawXhl7nJ0ZrLH3h9ifpTTP+WuH/kYn9c5Hw5dxeu4497v0ppn/LXD/wAj E/rj4cu4rxx73fpTTP8Alrh/5GJ/XHw5dxXjj3vjv80pI5PzE8wPGwdGvJCrKag/IjOu0YrDH3PI 6w3ll7yxbMlxnYq7FXYq7FXYq7FW45JI5FkjYpIhDI6kggjcEEYkK9b8k/8AORPmPSVjs/MEf6Zs VoouK8btRsN3Pwy0H825/mzVansqE94ek/Y7TTdqzhtL1D7f2/jd7J5f/Ob8utbRfS1aOynIq1vf /wCjMK9uTn02P+q5zT5ez80Ol+7d3GLtHDPrXv2/YivzKuILj8tfME1vIk0L2ExSSNgykcexFRkd GCM0Qe9nqyDhkR3Pkryn/wApVo3/ADHW3/J5c6rP9EvcXlcH1x94faupa1o+lx+rqV9b2Mf89xKk Q+9yM42GOUvpBL2M8sYfUQPe888zf85C+RNKR0055NauxUKluDHCGHZppANvdFbNhh7Kyy+r0h1+ btXFH6fUfx+Orw3z1+bnm7zfyt7qYWelE1XTraqxmhqPUY/FIfmaeAGbvTaHHi3G8u90mp1uTNsf p7mFZmOI7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq2ru oIViAdjQ0qMUU0CQajYjocUuJJNSak9ScVAdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVd irsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdi rsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdir sVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVTvyP PpsHm/SG1SCK405rmOK7inUNH6Up9N2YH+UNy+jKdQJHHLh503acgZBxcr3t9Ta1+Uvka60a/trL Q7K3vJ7eWO2nWJVaOVkIRwe3FqHOYx67KJAmRq3psuhxmJAiAafHzKVJVhQg0IPjnWvJvQ/yL8pW fmPzuqahbpc6bY28txcwyryjckelGp/2UnIf6uYHaWc48Wxokuf2dgGTLvyAt6V+eX5a+WrLyNJq miaXb2Nxp88Uk7wIEZoXPpMu3X4pFb6M1vZusnLLwyJNh2PaWkhHFxRAFF85Z0Lz73f/AJx78gaB q+ganqut6fBfrJcrb2q3CB+AhTk7LX+YygH/AFc0fauqnCQjE1s7vsrTQnGUpC96/HzS7/nIjSPK mhto+maLpltY3Uwlubp4Iwj+mKJGCR2J5/dlvZWTJPilIkhr7VhjgYxiADzP6P0vGc27qHYq+k/y C8seWtS8grc6hpNneXH1uZfWuLeKV+I40HJ1JoM53tTPOOWoyIFd70HZmnxzxXKMSb6gPPv+citJ 0vTPO1lb6bZwWMDabFI0VtGkSFzPOCxVAorRQK5n9lZJSxkyJPq6+4OD2pijDIBEADh6bdS8tzZu tdirsVfV/wCVPlHypefl5odzd6LYXNzLATLPLbQu7HmwqzMpJzl9dqMkc0gJSA95em0WlxSxRJjE n3Bln+BfJP8A1L2m/wDSHB/zRmJ+ay/zpfMuV+Sw/wAyP+lDv8C+Sf8AqXtN/wCkOD/mjH81l/nS +ZX8lh/mR/0oY3+ZHk/yla+QteuLXRLCC4ispWimitYUdWC7FWVQQcydJqMhyxBlKr7y4+r0mKOK REIg13B8kZ1Ty7sVdir3j/nG/wAv6Dqmla0+p6ba3zxTwiNrmCOYqCjEhS6tTNJ2tlnAx4SR7i7r srBCYlxREuXMWnn52flNpd35cOseXdPhtL/S1aSa3tIkiWe36v8ACgUF4/tD2qN9so7O10hPhmbE u/vbu0NBHg4sYAMeg6j9j5rzonn3pP8Azj/pmm6l59a31G0hvbf6lM3o3EaSpyDJQ8XBFRXNd2pO UcVxNG3YdmY4zy1IAiur1L88/K3ljT/y6vbqw0iytLlZrcLPBbxROAZVBAZFB3Gazs3PklmAMiRv 1dl2jpsUMRMYxB25APmTOkeddirsVdirsVdir7b8ka4Nd8o6RqxcPJd2sbTsOnrAcZR9EisM4zU4 uDJKPcXstNl8THGXePt6/a+TfzS0YaN+YOu2KgLH9ZaeJV6CO4AnRR/qrJTOq0eTjxRPl92zy2sx 8GWQ8/v3e0/84y6CbXyxqOsyKRJqVwIoie8VsKVHzkkcH5Zpu2MtzEe4fe7jsfFUDLvP3PVPMOjw 61oOoaTNQR39vLbliK8TIpUN81JqM1eLJwTEu4u0zY+OBj3h8OTwS288kEylJYmKSIdiGU0IPyOd qDe7xRFPsX8pNF/Q/wCXWh2pFJJLcXUu1DyuSZqH3UOF+jOR1+TjzSPnXy2et0GPhwxHlfz3fOv5 5a6dX/MjUuL84NP4WMPt6I/eD/kcz50PZuLgwjz3/HwdB2jl48x8tvl+22A5nOC7FX1N/wA45f8A kuV/5jJ/+Nc5jtb+++Aem7J/ufi81/5yb/5Tyw/7ZUX/AFEXGbHsf+6P9b9Add2x/ej+r+kvI82z qnYq7FX2L+T3/ks9A/5hz/ycbOR7Q/v5PW9n/wBzF4B+eOp6lB+aOtRQ3c0Ua/VeKJIyqK2kJNAD m+7NgDgjt3/eXRdpSIzy37vuDBf0zrH/AC3XH/I1/wCuZ3BHucHjPetk1XVJUaOS8neNhRkaRyCP cE4iAHRTInqhckxdirsVfQ//ADi5/wAcfXv+YiH/AIg2aDtr6o/F3/Y3KXwe35pHdPlD87fy7/wp 5jN5YxcdD1QtJbcR8MMvWSDboB9pPbb9k51XZ2r8WFH6o/i3lu0NJ4U7H0nl+pGf843/APkxG/5g Z/8AiSZDtb+5+IZ9k/33wL1z/nIL/wAljff8Zrb/AJPLmp7K/vh7i7XtT+5PwfKGdS8u7FXYq7FX Yq7FX0z/AM41a79c8mXWku/KXSro8E/lhuB6i/fIJM5vtjFWQS/nD7vwHo+yMt4zH+afsP7bYX/z k3on1fzPperooCahatC9Opltn3J/2EqD6Mzex8lwMe4/e4XbGOsgl3j7v7Q908jaEvl7ydpOksPT e0tk+sCuwmYc5j/yMZjmj1OXxMkpd5/sd5pcXh44x7h/ahvy986W/nDQpNUgXgI7u4tyngqPWKu5 3MLoT75LV6Y4Z8PkPx82Gk1IzQ4vM/j5Pnn8zvJcq/nG+kQqUj166gnt29rxwJG+Ql5/dnQaPUf4 PxH+Efd+x0Gs09ajhH8R+/8Aa+oNRvbXSNHur2ReNpp9u8zqvaOBCxA/2K5zMImcgOpL005CESeg D4bvbye9vJ7y4bnPcyPNM3i8jFmP3nO2jEAUHipSJNnmo4UOxV9Tf845f+S5X/mMn/41zmO1v774 B6bsn+5+LzX/AJyb/wCU8sP+2VF/1EXGbHsf+6P9b9Add2x/ej+r+kvI82zqnYq7FX2L+T3/AJLP QP8AmHP/ACcbOR7Q/v5PW9n/ANzFJPOP5EaB5p8x3eu3eo3cFxeenzii9PgPSiWIU5KTuErl2n7T ligIADZp1HZsMszIk7pL/wBCweVf+rvffdD/AM0Zd/LM/wCaGn+R4fzi8x/OD8ttM8j3emQ2F1Pd LfRyvIZ+FVMbKBTgF/mzZ6DVyzAkiqdbr9JHCQAbt57me4DsVdir6H/5xc/44+vf8xEP/EGzQdtf VH4u/wCxuUvgzvz15/j8p+YfLcN6Quk6s1zBeSmg9Jl9H0pan9lS55exr2zB02l8WEyPqjVfa5mq 1fhZIA/TK7+zdN/OvlLT/Nnlu60a8oomXlbT05GKZf7uRfkeviKjvlOmznFMSDdqcAywMT8PIvBf yI0i/wBG/Nm80vUIjDeWdrcRTRnxVk3B7qRuD3G+b3tOYnpxIciQ6TsyBhnMTzAL1D/nIL/yWN9/ xmtv+Ty5rOyv74e4uy7U/uT8HyhnUvLuxV2KuxV2KuxV6x/zjdrn1LzzNpjsRFq1q6Ig6GaD96pP yjEmavtfHxYuL+aXadk5eHLw/wA4ft/W9z8/eS4/NH6CDKhXTdThu5+f7Vugb1Y/9meOaTS6jwuL zjXxd1q9N4vD5Sv4KX5ueYBoX5e6xdK3GeeH6pbb0PqXH7uq+6qxb6MOgxceaI+PyRr8vBhke/b5 vJ/+cYdf9LVtW0GRvhuoVu4ATtzhPBwPdlkB/wBjm07ZxXGM+7Z1fY+WpGPeL+T1zXfJUepefPLv mXitNJjuUuK9W5pSCn+o7u2arFqeHFKH86v2u1y6bizRn/Nu/wBCS/n9rn6M/Li7hVis2pyxWUZB 3ox9SSvsY4mX6cu7Lx8WYH+bu09qZOHCR/ONfpfJ+dS8u7FXYq+pv+ccv/Jcr/zGT/8AGucx2t/f fAPTdk/3Pxea/wDOTf8Aynlh/wBsqL/qIuM2PY/90f636A67tj+9H9X9JeR5tnVOxV2KvsX8nv8A yWegf8w5/wCTjZyPaH9/J63s/wDuYvPfzK/PTzb5X866joVhaWEtpZ+j6TzxzNIfVgjlbkUmRftO abZn6Ps3HlxCRJs+7v8Ac4Gr7SyY8hiAKH6vexj/AKGb8+f8sGlf8irj/sozJ/kfF3y+z9Tjfyxl 7o/b+thvn38x9c87z2c2qwW0DWSukQtUkQESEE8vUkl/l7ZmaXSRwgiN797ianVyzEGVbdzFcynF dirsVfQ//OLn/HH17/mIh/4g2aDtr6o/F3/Y3KXwQ3/OU39z5a/1rz9UGS7F/j+H6WvtrnD/ADv9 6n35AfmJ+ndEPl7UJOWq6Sg9B2PxTWoNFP8ArRbKfbj75R2ppOCXGPpl9/7XI7L1XHHgP1R+79jO 5vKFg3nS281xUivY7WSyuQBtKjFWjY/5SUIr3B9hmCNQfCOM8rtzjpx4oyDnVMX/AOcgv/JY33/G a2/5PLmT2V/fD3Fxu1P7k/B8oZ1Ly7sVdirsVdirsVTbyjrZ0PzRpWr1IWyuopZQvUxhh6i/7JKj Ks+PjgY94bMOTgmJdxfcAIIBBqDuCM4p7V4J/wA5QeYNtG8vRt156hcrT5wwmv8AyNze9jYvqn8P 1/odF2zl3jD4/oH6Xk/5ceYP8P8AnjR9UZwkMVwqXLHoIZqxSn6EcnNrq8XiYpR8nVaXL4eSMu4/ Z1+x9p5xr2T53/5yf1z1dY0fREO1rA93NQ1BaduCgjxURE/7LOh7Gx1GUu818nn+2MlzjHuF/P8A seI5uXTOxV2Kvqb/AJxy/wDJcr/zGT/8a5zHa3998A9N2T/c/F5r/wA5N/8AKeWH/bKi/wCoi4zY 9j/3R/rfoDru2P70f1f0l5Hm2dU7FXYq+xfye/8AJZ6B/wAw5/5ONnI9of38nrez/wC5i8m/Nn8p vzA178wNV1bSdK+s6fc/V/Rm+sW0fL07aONvhkkRhRkI3GbXQ67FDEIyNEX0Pe6vXaHLPKZRFg11 HcxD/lRH5rf9WP8A6erP/qtmX/KeD+d9h/U4n8m5/wCb9o/WhdU/Jz8x9K0641G/0j0bK0jMtxL9 YtW4ooqTxSVmP0DJQ1+GRERLc+R/UxnoM0QSY7DzH62GZmOG7FXYq+h/+cXP+OPr3/MRD/xBs0Hb X1R+Lv8AsblL4Ib/AJym/ufLX+tefqgyXYv8fw/S19tc4f53+9eJ+XNf1Dy9rlnrOntxurOQSIDX iw6MjUp8LqSp9s3OXEMkTE8i6fFkMJCQ5h9n+V/MeneZNBs9Z09q292gbgftI42eNqftI1Qc47Ph OOZiej2ODMMkBIdWG/8AOQX/AJLG+/4zW3/J5czOyv74e4uH2p/cn4PlDOpeXdirsVdirsVdirsV fUHkj87vIcXlHSYNY1X6tqdvbRwXUTxTu3OEenyLIjqeYXl175zWp7NynJIxFxJ7w9Fpu08QxxEj UgO4n9Dwv80/NMHmfzzqWqWrmSxLLDZMQRWGFQgYBqEByC+/jm70WE48QiefV02szjLkMhy6MTzK cZ9S+Ufzy8h/4Y0tdZ1b6vqqW0cd7G8M7n1Y14M3JI2U8qcuvfOZz9mZeM8I9N7bh6TB2piEAJH1 Vvsf0B4D+ZnmWHzJ551bVrd/UtJZRHaOAwDQwqIkYBqEcgnKh8c3ujw+HijE83R6rMMmSUhyLGMy XHdirsVe7fk3+bHkvyx5NGl6xdSQ3guZZeCQySDi/Gh5KCO2aTtDQ5MuTiiNqd32frceLHwyO9sK /O7zjoXmvzXaajosrTWsVhHbuzo0Z9RZpXIowB+zIMzOztPLFjIlzv8AU4faOojlyAx5V+t59me4 DsVdir6M/Lr86PIOieSdJ0rULuWO9tISk6LBI4B5sdmUEHY5z+r7PyzymQGxeg0naGKGIRkdwyP/ AKGC/LH/AJbpv+kab/mnMb+Ss3cPm5H8qYe/7Hf9DBflj/y3Tf8ASNN/zTj/ACVm7h81/lTD3/Yk Xnr87fy+1fydrGmWN5K93eWskMCNBKoLsKAEkUGX6bs7LDJGRGwPe06ntHFPHKIO5Hc+bc6J512K uxV7B+Rf5keVPKOnarBrdxJDJdTRPCEieSoVSD9kGnXNT2lpJ5TEx6O17N1cMQlxdVH89vzD8r+b 49FXQ53nNk1ybjnG8dPVEXGnICv2DkuzNLPFxcXWv0se09TDKY8PS/tp5Lm0dY9M/JX80ofJ+oXF jqzv+gb0GRyoLmGdRs6qOocDi1PY9s1vaOi8YAx+oOx7P1ngyIl9J+9mH5ufm55I8yeSLrSdJupZ b2WWF0R4ZEFEkDN8TADoMxNBoMuPKJSGzma7XYsmIxid3geb10TsVdiqP8waTNo+u6hpUw/eWNxL bk+PpuVBFexAqMhimJxEh1DPJAwkYnoaQGTYOxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2K uxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxVH+X9Jm1jXdP0qEfvL64itwfD1HCkmnYA1OQyzEImR6B njgZyER1NPZv+cjPy9mW6Xzjp0XKGRVi1dVBJV1oscx9mFEbwIHjmn7J1Qrwz8Hb9raWj4g5dXhW bt0rsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsV e6/845/l7M103nHUYuMMatFpCsCCztVZJh7KKoviSfDNJ2tqhXhj4u67J0tnxDy6PfpoYZ4XhmRZ YZVKSxOAysrCjKynYgjqM0IJBsO+IBFF4D+YX/OOd0s0uo+TmWSFqs2kStxdST0hkY0Yf5LkEeJz faXtYVWT5uh1XZJG+Pl3PGdW8v65o8xh1XT7ixkG1J4njr8iwAI9xm4hljMXEguonjlA1IUgMmwd irsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdiqP0ny/rusTCHStP uL6TuLeJ5KdqkqCAPc5CeWMBciAzhjlM1EE+57N+Xv8AzjndNNFqHnEiOFSGXSIn5OxB6TSKaKP8 lCSfEZp9V2sKrH83b6XsknfJy7nv0MMMEKQwosUMShIokAVVVRRVVRsAB0GaEkk2XfAACg//2Q== 1 False False 297.038889 209.902778 Millimeters 100c 38m 0y 15k Standard-Farbfeldgruppe 0 100c 38m 0y 15k SPOT 100.000000 CMYK 100.000000 38.000000 0.000000 14.999999 application/postscript TULogo_CMYK proof:pdf xmp.did:0180117407206811AE56EFAE3BDF0A74 xmp.iid:0180117407206811AE56EFAE3BDF0A74 uuid:71d9978c-29ba-844b-a3d0-e3f8298f110b uuid:00caa9d2-7df9-4944-92fb-487c773ddedf uuid:71d9978c-29ba-844b-a3d0-e3f8298f110b uuid:71d9978c-29ba-844b-a3d0-e3f8298f110b proof:pdf saved xmp.iid:0180117407206811AE56EFAE3BDF0A74 2009-09-08T17:05:10+02:00 Adobe Illustrator CS4 / Adobe PDF library 9.00 % &&end XMP packet marker&& [{ai_metadata_stream_123} <> /PUT AI11_PDFMark5 [/Document 1 dict begin /Metadata {ai_metadata_stream_123} def currentdict end /BDC AI11_PDFMark5 %ADOEndClientInjection: PageSetup End "AI11EPS" %%EndPageSetup 1 -1 scale 0 -226.771 translate pgsv [1 0 0 1 0 0 ]ct gsave np gsave 0 0 mo 0 226.771 li 226.772 226.771 li 226.772 0 li cp clp [1 0 0 1 0 0 ]ct 204.094 0 mo 216.61 0 226.725 10.1577 226.725 22.6885 cv 226.772 204.084 li 226.772 216.614 216.579 226.771 204.063 226.771 cv 22.6782 226.771 li 10.1621 226.771 0 216.614 0 204.084 cv .0161133 22.6885 li .0161133 10.1577 10.146 0 22.6621 0 cv 204.094 0 li cp 48.0801 134.498 mo 48.0801 54.0361 li 72.293 54.0361 li 72.293 134.498 li 48.0801 134.498 li cp 17.4346 22.7803 mo 102.126 22.7803 li 102.126 46.9937 li 17.4346 46.9937 li 17.4346 22.7803 li cp 68.3276 164.095 mo 60.1997 164.095 li 53.9785 189.263 li 46.6914 164.095 li 40.8623 164.095 li 33.5752 189.263 li 27.4097 164.095 li 19.2822 164.095 li 29.9321 204.004 li 36.4336 204.004 li 43.7769 179.789 li 51.1201 204.004 li 57.6221 204.004 li 68.3276 164.095 li cp 203.706 203.558 mo 203.706 163.648 li 195.914 163.648 li 195.914 188.087 li 180.107 163.648 li 173.157 163.648 li 173.157 203.558 li 180.948 203.558 li 180.948 179.063 li 196.755 203.558 li 203.706 203.558 li cp 147.888 203.558 mo 147.888 196.607 li 129.391 196.607 li 129.391 186.91 li 145.142 186.91 li 145.142 179.959 li 129.391 179.959 li 129.391 170.6 li 147.888 170.6 li 147.888 163.648 li 121.599 163.648 li 121.599 203.558 li 147.888 203.558 li cp 94.9116 203.558 mo 94.9116 163.648 li 87.1196 163.648 li 87.1196 203.558 li 94.9116 203.558 li cp 154.854 135.14 mo 153.573 135.032 li 130.874 132.662 113.321 113.481 113.321 90.1538 cv 113.321 22.7803 li 137.861 22.7803 li 137.821 89.5562 li 137.821 99.3218 144.612 108.104 153.724 110.417 cv 154.125 110.513 154.437 110.577 154.846 110.649 cv 154.854 135.14 li cp 162.202 110.614 mo 162.57 110.547 162.843 110.503 163.203 110.417 cv 172.317 108.105 179.109 99.3223 179.109 89.5562 cv 178.9 22.7803 li 203.597 22.7803 li 203.597 89.9072 li 203.62 113.235 185.905 132.635 163.203 135.033 cv 162.207 135.136 li 162.202 110.614 li cp false sop /0 << /Name (100c 38m 0y 15k) /0 [/DeviceCMYK] /CSA add_res /CSA /0 get_csa_by_name /MappedCSA /0 /CSA get_res /TintMethod /Subtractive /TintProc null /NComponents 4 /Components [ 1 .38 0 .15 ] >> /CSD add_res 1 /0 /CSD get_res sepcs 1 sep ef %ADOBeginClientInjection: EndPageContent "AI11EPS" userdict /annotatepage 2 copy known {get exec}{pop pop} ifelse %ADOEndClientInjection: EndPageContent "AI11EPS" grestore grestore pgrs %%PageTrailer %ADOBeginClientInjection: PageTrailer Start "AI11EPS" [/EMC AI11_PDFMark5 [/NamespacePop AI11_PDFMark5 %ADOEndClientInjection: PageTrailer Start "AI11EPS" [ [/CSA [/0 ]] [/CSD [/0 ]] ] del_res Adobe_AGM_Image/pt gx Adobe_CoolType_Core/pt get exec Adobe_AGM_Core/pt gx currentdict Adobe_AGM_Utils eq {end} if %%Trailer Adobe_AGM_Image/dt get exec Adobe_CoolType_Core/dt get exec Adobe_AGM_Core/dt get exec %%EOF %AI9_PrintingDataEnd userdict /AI9_read_buffer 256 string put userdict begin /ai9_skip_data { mark { currentfile AI9_read_buffer { readline } stopped { } { not { exit } if (%AI9_PrivateDataEnd) eq { exit } if } ifelse } loop cleartomark } def end userdict /ai9_skip_data get exec %AI9_PrivateDataBegin %!PS-Adobe-3.0 EPSF-3.0 %%Creator: Adobe Illustrator(R) 14.0 %%AI8_CreatorVersion: 14.0.0 %%For: (iBook) () %%Title: (TU_Signet_CMYK.eps) %%CreationDate: 08.09.09 17:05 %%Canvassize: 16383 %AI9_DataStream %Gb"07l$DX+Z7($+J9N\=80E#_kt+M]+:1V184P^M^e#@S+LW//NVR'Kc$b?a+MS2G>FH\9IcNaO&K?ViLMNIMF@aJ2oGW'\BH?&>#m7:kE]0,F,!DbndGp*_e$'E=N:]^;?&lYg_+.#"N-HnioDtkNCTG_i"LVA$QA-U4Hh4)eT\L^ %pM;B/+sM&rk8:ppfDhnZ!0fih\NIbEQorPHR@NurUq$2H./m4W/qj4CU.jZ%Q&)PN?R:1n;VK%'!Bmu8RUt$!eMfGYYpeF&L5JU6 %dV2#GoO9ima$k_A^9=b_Doi=',`.?Eo::W_<.+O-HBXq %3JPk[IA@3a-Ct8qdsL6LIoYtL(^-]0Oip$#D?6'5P(oUH8J0/kQfIX^32&IT(=F\ae>;->A==LajOJb,TFuOnrsJW&<9sW/'LeO* %j8rRk\M3!HCuV/uqELAV.ar;q2h4IeAJ$$d=Cs\o<8SS.d2VTNI&;o.rS>/WK%=*HB[g+`cLXL/ktN8e&fWe&2"Eh4d!:? %i08^H=1qsM?\pJq9;2CY[lE`(9uIA5;g[Dn%i$fB$tg.r1b^gZACQt(A+Z\u?gk2!iWiTFk3)$Y`XAE]NQ:$(r\TrFjmV[&?p4d5 %:FF3d`GWk&d>ROt]dg^R9"KN!RcMrWXGOJ=72A"m(XkKC9#MJUV>(3JQUb:HfSL!+DqQ]j\DI %ODQ8?/C!90k&rd+)dTg`inm+34<%%+EU;mUq[@lV5PboUW.dF&>YS2'/Tu==jJjW=p'(ZBIq2?J285D4crU06qPX\[naKHiAm`Hi %>;ta!qi@H7g7.rP+0PM4h4'F[o!c_WQ7"7M;MP.Tf_u!8l'hebdcFsu)#sk1162#:a;P1bq5U,!g'6>+(W]^Z %'TL/Timmc."n`>SX3.%hYXFZ;PNcEfoES;'KV.!+&8QkSG`=E!9As)c@Y&':#aE$$[pb]*!39hA&bpJ^co'U(5hE4koCrRgE<^mi %!g!=id`_mdr5pbsnFZ5t!;5:1+HT"^NS+[n8K`Z%Ih@EC81&eWK5:R6$!%p1U=BY09H&_M**=n:>5`u,i<3Kb/$N\^JIbsr"$7M0 %5I>>=A]7..L!cY2bKM`^WioB&L1(!&m4eV%G_cC'+feg^"M@h?,:/%A3g`a4Bml0V7TIiH#mcD?G/S"hI\9^NL)Uj23(FfRUd?FWOXo[-[R+?P`^/rub;?Dcik,a'\k_g@uar#4d#ks.:p%Gs7_=ja^L+]FfiPI]Qo9nFMQMFVue2=Wsl,Mt\.@ %?c.Hc75HEqiBKFQbt%4WW'<*@qX[Jn7R).e(sKan`6(;-(a)Gndj\]B]r;)"JYr49W6T1I;([7g+/&LnctcG?;I;iT8H63j#:f#2 %7c@QNKE8<2VBNkBdK!`q+j+09N<&pl_nQAbPSs'08s:EGRa:#VjoHs+8D@1n2o@)qL<%hLR=S4t1#X^G'hOe61BC0brsunQ3+1h+h]a?^ %-CI#0`%j\a^D%Q^#=47r%[><_Z@,%>[ch5N+iW_caiNo19Ke7 %kE#-(`DBFOU*MLjq8`9Fs-Gj?HpZ0Eh.5.C*34.c)n_Vj4$lPJbcSTXp?SLTe;tZFP%R^1"m#;9*6a-D#Q"kgU$R$r/]]/r?a;`) %b_3gJ=C@t(0IC@G0[A\C.53Pd#4Rt7ht+*\1)#G68NW3rq/]+?Kb>6hDggiH%LDAqBGMZ)UAn_*rC<`HeUQ`1]`'\g5LUR@;FDId %d3%^A6T(@UV[R[,ds5Bm>EITWA%"*4O;?m86tBgsRZ;JA`j$:3ZF6&1+lg>V^&iPj-]EaNT<\P&E%@@Vqr?WDckpY9rrlHH-^Y;O %T.lU,!4#a6q7HA3GGm8l(BrS'4S66Q,[$F/+Zs/ %V)BP0bW^3X\bG%LA6-J9VR$L@l:WaDg6B_49M274/A##)[MalC_L/GeY=udb901g1p/!GB#1L`+IgfD0Sb^7[9&d&[-4k8US.%hX %=]>r-.&Nu]\olMHi.GrY7E:BYW"IJQ)TlUGISG04g_KpNV8P=fRY7%b85,l+";,p]c#8M:F`/T#Ks@Sonne&iJ2q@$)u'Bjq*f/L %T?&:@Ji3ri.Wi;LPrCqB7gU')!ID*R-Dk\W@,(=)?GiQ90i8R9Z#**=gXN-F_'Qn%)lhacB&,Y5_Kj0=D> %P"d)QFh#/$[ll1aldU]E[jA- %:[gUL2#I@EoP5rTXYC*Y(#!5$@:;h:JE'b!H(,jY4+b"n<3J94Ve's#8Cc7($(:BVo*q'DAZ-FcOPnqh<7hm9C1u,O9ga-SB4-Q) %-Q`IkKUT`j,hV"#9U%Rr\AdnCP"["$C".\+A<@E;N=@$sd8mPU]j@hCJg-+iN^Djp=Aq%7Ybl)L_4%++'l-ltbVmJO/:n=\)?*30 %a/ZC(P179_Eq'@M6gFC0OX;]20P0ctOXlUpin:IO)._BNR'1+e2NT4F`fiGbd!`I8jD;-\dpn!UrUA?,3hg\)u %0k`J8.USq/VpTp]d+M=FRko#]lu^21H[^Qf'_s@dSH:@FeZD,>D]]QWCQ6@[b7V,aZ3oqq57&YGCaP3t1SB+B"B**K'%k5qZ3\jA %a8gA\F>#!>-<^AcJi+#YBLrQBLWhr]B@@o\5'EPrFXe@eb$t<3eI5?q/A)3IZm6lBED$irD1Jo3RW%K.DEYW^g4hALgEb&e,G[jr>&+LS_c!Nr?o9?))Gkm=S4WZ]N,^*THsXo %Db\8>h=9I(HZ\n`O;V:*pk($oCFY=4VNmcne[:Ws@3hXGk#V^XBrH!Pa=ae$f#ON\MO>pqN!cuYY5G#D?%3;%\n@CVjhkYFlf(E9%&,OA\g5"sk;uM*KeZ@Wa,c'87rN`b-[TNHT``#jP$shU2-GD"+#0V1>$17YY %ju"HChl'c%5G!`Q[FQ'I&MGts>As\On![33>i-)LSE#6?o9Wj+l%1<1G'(a@'p#>$tcq'4`U6+'1@V(Yk+8:i$I;EX//i#`q,9(`\IYmM-$T" %FK1m>mR?@nE<:5%dg!TR#I;F@=@'tEnD8E5W,UhNJ,d8;96E5!G/?oXl8.)k2Zr,%i;T-$rKN!(Drr_Bp"p(QS7#)>c^,P[fA$.kWKK0TY(>7_^R(+RIS7g.sh]=o*F:D^_fIa"ULMo0lOl#0(X;2XS"ZiRf1GuaY %W,$U]]*4h*`cf>R3A'T&gVRn!@Lgro+A^+np"ip.YF8Ujp1-XKTUOpI\%_^N!(,K;"R^h6mE4[-5#W1>Gp(W#@HK]h4hC).fjs_! %)Empp;XbKphS1\?N#1*c0D&^,F`a[KWum&lW3LVh+B1SXcCscoNum(e^OhSIiuf17mb@q3_,8#m[MsXp= %iisD\ZPP`n4QqGqpR,D8Eh/MAq#V[?*Mnh"h@EQHHn(b:b'lSBa5:[]gd?r>QuCYI=0)NkP&trq/&c+eB?00=q(U^YIn8(d:[.`i %l/dc(51M^'[=cmnTA(sZ-<`(%Xm9XH^3]tYCM%S(<)e^!h;q/Q4c[9=T0A[WU73KH/g:SS!Z?ooONuqn]K^F!8Z=dh_\@ %hVa=(AuC3Afr_n#]80rXc$Y5!S^*(=Vs2JO>@Vk1>W^c?%C1Q3^r`eXfd5uCZ8enAkOcSZ,ml\-:D[-tD81hm1c)l":s;[/:0+tAUpoa<4f6\35agcI3#g"$F+#u:6 %][!6hSXj*R8jdEma.mOKK^&+h_R>AsN&lYTjrj/=r8i;K`Y@$"gYJVfCo^r^`1PCH2Fp,aLcVAhrLZe %:@IJiqA=>n#e>oeS@tPkN\R(G7IK%ShFk#B4rj6=h)Y]p)0Xd%QBVU_JR?jlld]jj^!k0*cX2a`rE\#M?^8Pel4iNM`Nam$U>(a= %:2=\?qIe^!e#VMboN[P0N_VknW8!MCnLTF1q*sl4P&74i$6)Un^1`P&q877lFrFXg1fp/nOgk.1(Tbcqp4;9Elhg+pQIb]]cYkd36lFt*r^,$Ln`l\'\^NOs4EHQRC!3kB)b;5^W7Ff_(YEi/TdJt91T"MQF'D`fN$Vk?t;5rqZ8hW-:-9's7*/a%[YmbB[, %c.Aq@Zd^Dg)[O6#LPN]`g2gMZlK0XX\_a-=Ih+8N'Z)io'\;[b5>Ac#ctJp3%YAY$N%ba'3'I)iBgA%\Ie0?_;3@O09U" %Xtaj[^%9^Z:"fL43Vs^#25ARC*:lAK]f\KKmpP"DJ$b*DqmbtHr8eTiCJm@Y6EqlLA^"_5nSNIH:3\r_`4Xn:hcKLnNZE$IMD`Sh %%pRJdN;:$slW1=#0CEa39!r"2O]99tmH"f(aM6g(8JLdqn^htW:@ad]h`oTb8Be]\mC]m52aUdJeqIS:]?^\*07MaU\_#r:b?Nqf %5M1eRC,h*-O-__8j%=FnqLa4pgA,m&iVGJ)E':]A#;;.nhFc3^pGrjX*FFT>n9nd94^UCd\h8KM[gPMn.WrMfi9O.gp$M[j(u>_h %#N*q`"c@m*#Edj&LL%aoqJ[5'T,B`))%oi&KuV[s,e_jMH`LUlc#R]+1[E=1\9ks1"_6)PLLBKJ%I>UWKA0f/3AQB)>&aLE$-(<' %^_V59jj_Td&#LS%:&>e#BD.k.aA6upG4]d72EPT=oW`=V]9sW[oHM)fgGf5gSYTP/8fR]R5qB*TG.+8K'n!NZK&Zgs>M\Z<[VeQ8 %]l39r*I5JF\l!g4@ln7;jm3_Ze*JF6,u%9a?*da>Vk#b5VtFF%heBbVFc9'od(6H'Rr<-BAEPUlVtI*?Lf*/H5^lB\M40,KnNF %[JcjZd=uH3/ca&\=XFhLs$12#D)=)nY)ErlUPHN^_a6Trhk;7N+'L=BUgs^.%[<-oc!h+[UXE*6f,uIgX+82t0=G)" %cFo'prjf<%a^)o1`18jC`YSn`7u7-V*ij07B[0uMkl1:mC]7cZ)Sa)d&EL+8&HZeCG0.[sT>,]!!lj5X=9N%+G\.J>.+m?$(AkB. %o17&--=7W)ZZ=W]'=N'&^*o&;Hd_!9PptUSa`J&TLuf12S_1k0H;7cr1M[68kj>)l&Fu?N[hJ2jRd_.*7`(\.<"h.r %@k9tdT1(/Bl&J5m6.'YrKiJQ6@S_&i=FWH6l%)pKI^F[ob\Lm*O]d0MOpZ'1RO %O#C(p`18K+W=??$KU.dL6K(.t'LY]4(Z#]]/iYD#,ep5(!s`.gPD_XBbkN9or)>Xs+(VGmG-SoS;dqg]3ZR6oN[Y!9hap4=9.P>R %clQuie1cd%b@\-Oih11UWYVuV#l1"J;1@rjH?*Bi!;Tr_9Ir0;UaJcg%>(`>nY%Z$5_!.130]H2[8/l:=HcJnZJG"YrrGY%oS7%C^I;DdoIp,XDo*Z8'X&Q]]IIdEs1mE*m'9uFqeM]m@!U-nJjX(se%Uu/ %*c75HM3V4p=>R#!GX#"E$QR;DmVGY'lh:$)SN.KGc9jqh,lg#PMi8+rIUDT+Upc0-;;ei2VJsYKRJ+8ND[+dpP,2@;%E32l4ikVR2ohJImMiXj'0% %H?2[3l1UL%&,o8Fb7_82DK]o!K.6\(ba0M)?E[eNS3(lO&q"#u%\gGtH0)?gkg\!c-q,k`!1D<^*D16!!AHS?]f>Z?EeGSt$Nl@AT@srZ/bY;]S^<\T^jXH %I,8i/)bD.(QtPCacH7XK6$[JHln(!G%ih3fca)GP!G4P>oVFrmkCZ;M[jTHQ-%*/('*`Q=lqfZ74'j&4\3hA9)'@.[GeBjuV8`?@ %K\Z:aL.'d=O_At<,oY&r!nP@0,api9SlXTg`KPLA3;Abk29B-"'+cji?9aT*N+p!6]/G%=`P/f9l,sUcKraV9i"!ptJJiLP %>8mWO1ttO%0!t!s!3h`:/ep'-in!Bum(pG0b>jH`XCC)ATP1oU,q\iKB&C3d$%tG":h`;&0%;2+Y]7L_U(&-S&;GkY9O4UiZ4Qo= %<&bL1rM,i71j,WPGfgH4^tJhiY;bfke4*';.`i_>\68=*-n-]Q>p]Y]<""l+dKQ:!?=!f0XL1uC&u(pg %aeK*"PkPZcF/%a086&E^=C)KVP]0Jm*NWG]qNJ1AW@&?;Bpct!r-lE@N2=RWP%&E^STssM7,Am1L:gLdW#qpPZ-8-0di/QdTk7b0 %MUD3_E@1k^R=q)H$bmth_jD:+7$8+/$TJ2WW+nGWUd0gu(+(Rb6R-A1+Oa^pK>LpjZeiq>^=gX4@pd")0JfjLN9B5,[g]i5BK\@t %j:&VRN1hP=kf=W`lL&ff*'/*E*@Y4>:C\C*K?g&?\9nc,3J/pq>#,VK3J5V>/\#m4*BAa)&6_j>*BA`^7H'4]tjR6\eW^/2[lH<]-e=5`d<02.K]A[2)4N3$l5HUNP8H[5^`>(0r&nRm6O*IOZN001[s %fdoU]m8>9`74F`Tp+nl6Yh7+Y6PcbIjrF?p&mjmXRcodL*,h)2R?&D,0EOk).OLLGH %SB\%*%B5Chetn %\P)'0V9BQ1*m#Q8Q;);5G),%nK\!A!VS5r%[6-Y,'LqQ)IEh.1gL?4(?"`#S_s!haA5IJC+Bn3OC6Zadp:FmeTi_J7*n[ec2CJF` %nWC"-TT6>te?KV*esIu:Bn]>W4&h6c0IRN,Bf?Pb5RJS8DbM2%r"ZZQ@c2[qnP.)K%;bm^j7CKO_)$]qLK=T/Sud^Rb``QbAr7[< %G-/%o`EkZM/\WWf576pD855g19/4'=2^^0[3[oii_]E$&"7b(69OQA93nA4@aHps?$QK.Fi*\>_D`RJ4O4C?PD,G+Y!]6C["kPA8 %Z\Q(9JhM=+[T!oc&cA4*TAo"Hp:\S!5@S`=4dHqNHPO376*a`6j==#bgJ6u!8H)WW-1#g9WE3-% %RP@4Sp!GRD-@X_c$X4q5;BdI/\O[`g7[2X9/7jkCPX;`p$c=%'dRabo/m!(^h+k`f.9\bm:N>_PTP)nSZ%,nf*#9hj4u,fEoAu)X %TTo#F`CLkiF<2='k]D]X3!)^t]/u%_1YL`L8So2S*d]pdO3A2[^9N!Kg)5T7`]3#MfJ\,LXl>JJf,ESmG?,fSMatdGm\!?CoC%f] %aVHH*//o^JSZ@I'eE-8mr-2a+IXXg)Y=O#_j;^FS//9P]77eKF(D=uNf0Sl8E>i>4G.s!,`1oDS*&0^bd87MqkNd-QTV8nbcIt-2 %9cD_O`.*l=6bgjjWBqC`e&NH%$b1"=YIloGZ&GgtWopbZHK;_";o]J+KtRRV:i[TUOt%t0dtR(r %LK!5A4P\pHkJJlms-Wh63;4ibFm$ZE'VS-i&*FOu\u'j7,a2;9JR8.kXuCrAirbXmLN>pPl,S;>c%gANG8(*_F$:S_/HKY.LU53_ %25G08q/@s51!Hm4oV=$b5j-3)X-"9U3[W9p`A$/WUksq5LltkXmGG%;?4V`$AL+'!*)^<$oa*:)j4HKk_8`_"&=caAlr>Hlqe#R> %G]/F^7s'PIj.3Tc:L&W,7sQeBNLYf7>D,K=+88I4[:D[2.=p"YH6(X$IcXtu(j6dg]QBH1-ZrPOJDP'DL7R"$D?E@I8B[*4pPRn1 %YRIX1^`m%RXrA)cr]K$82D&a42$@O+@`I^l1b5MYgD]#VUCJ@]I-,-8P;^pVN7Hs;X>(q7!*`f"lH@Kl1/bZBbW7LWKoM!- %)%'Y=.Y+jggpH.#;;E_]M2gipa_sWurADsQ/[9Y-+'Ke?92kcn)8R87MW*4)1+\/C-A'OHV5;%;jk*a8iLi?r):;q9j:\G2\9V4q %.bmXF9BoJ9J,@B";rJ^?&9Xh!9.HI4=(1KqE&dk7jo,\P:E!*g\3HCCpgansn96+idiRhVm6W[tE2L7.a`e7n305:;qaKQrfJ#JA %I`\h/_ZbBFH6n]L3l3%n0#0$Q"Ws;C(Q.9c#>14#@I]AhW!CQLFKJT'F(K"?oKrd;F!&^M\a!,\^3uMNb0:Uam@\fF>9.^7:o0[E %H`1h>BC4t][!d9TI?/!VYBgS`go8dE?YT[lkA[2rg4[Q'C[ie0]-P!o*a*Qh@)$qQfsX96TWaP23RPpeL/,RnmXo$ha9]=2PCHfBE`q]U.gUfM6u;4#sjPfk5G#]s'd%V,ajkmiEaRg8-:#UReu&S1WFgh-u3B %@^J*p:/'1E`nX-I%8h25[,[lW/#8PZc]DA/"UiD?M:g.1WY>J/FK%D.Na"AA.JcKI8phfE$BJ0*,G(,>k@*^`f2bJgXcAq;SLQ-i %P4Rpl>Ia?]qsrR[?0M^CH$O`7B@AHuL[g^* %kI(mT]l<;[VK9qWF2&f/Q#&K?T:jAR=YZDk/XL@/bXWbqf[ralV6fD+A9$a4Y:5sd%@$#b+hk;Igc"hI)VInES_H/a79?W@S'EUP %c>bVFL28n05E#JH\92?2\bXt(hr0Zg$6F"P0/=kB:%7&-K[f2P"&)STFP:H8_f[>&KY[p1s\4hfH*J4JRuKJn@@qI6'<-3e%5=O9?XQeEF$M,2XL6FAGs?*DN_-),Oe<4t>eF[8MF %ccW[S-8ha_bS*4;0J)EiTW4@c$J;FmGGH!OU(RW/e`D2KP8d,/FN5h!5!:Kgg]Aoq&r&Y!O`pEO1:ca5,+#Y!mk_RJE!`itGigP\ %$t&5D8'[QMR61:/=*,e"s"\eC$$/RU5"o2CNf_@OQUjVh-TF+M=Y"U'idf>:J7>Q+[nr="U[]b(q(MJpodCF[h9k:5g+R1I:F"_qqY)Yo,a:%Q<9lSJhn=F]tS0Yp,\tBg6Bu]*eDSnS[:O12:U,`bIb)4qU+/"\Sb]e)];dkg=hLSUWRGSba'V"XVN03D'ue@Vb#p+>B-A>:![/pHpcRMh[b'q6 %,/_]eB4_M410gfqSXEjL?K5q&A[kRqDY^loIOa+bXDq5"DB5.+e``GOj`rC!l:Q>;a\0M$\FVj5f;lS$nrdYn:&;#3k>C\*[ckX" %Y&HmXX#USd=\E4j(9W5G^!\:Z$6*93>hB%)@]cl^F*/?sCdN!,O)%:fIJIm+DYJM(PmYR82p:TK#QSKHM7!nP*p?#KMg<4Vh[='+ %Xg-XfM+qK+n1ggq?eY3acGe:#5A/7UI<.$Bt/P%1\,f4(=c9FD_HFPCu^]O3\dO"1HF=]XW/Q(+3hM5`!cp?p,m %.q,/"D%`%0;g;/Y!K,ao"g]l":sDO*d&&Np)2=DEggM&:?7H6l>s`6BG-H]EMS&AIj'$p0_VWf#H=sp;6T[1Nb[XjFHX`aJbZU]\ %8I%!Rjm&@@`eIWPhs`a5+P%<5^"d*n7OjHKle,L:FbhtM3O55J3NhN0AWsN>goWe(btP30dLd9n;tOsgEh,IoqGce9\WSmc"CCVtMYr8M3JDMsnMckUui/e_J('rb[=nWG0fYST@NB!u+**hV\4=e=:TUiAmq(fgkm4b5D/!D*D'7 %mbd[1(>J(s;u@Ql3YgI3ea's20o*npo6>*4JW)JUDhF("86NI)5Ot_e%ZgZag&)0j;[FY5(Bi75j7p9Xh9hdOP %/k2bQE,5(86L?.pTD7&5gQB?"U[%5eN[eYq;qh"$P&>`Og&K<]J[<+gCo"P(24X5fTtRPq++$(P_nD%;cJ=M^IGSbQUhA?u4*n2m95%`D+Dni[kH4si_aeFg %pr=@?,<^M@baK8)RcH3P8sgYa?e6?e_Au>7%'F#Kk1%i_RdtSZ@U7nF]5huG+c;E)b+P4IC]OZ]mopTup'>gh5,]-ZUVB4&]]*3Q %KfY@_c$@Wd3P3"HXLsKNRl=A>\R[;d(=Zcn=cm;`Plm>iAT^=.2?XT#K!kI=,nUE1_+5rKVmZ.IaWUf+NUZ %(B=%p1!f\[oRl5V8[6c+.@.sT>=]Q:Zjt@RZ:l#VD%<3pU3h_"0oE*2I]EY_KP0!n<=U.ArT1?cSb%W[!OP7n/s-<.cJ_kb(L&X& %d(q3a*TgfN/8g2CWMl^mCtU@)gQW7i!#K[WYiJo_U7PqW]L3MX*[SG%`,T+_ij1H?<^8jgA7'b']lZH'd-E!oEtl&X#m?2?qaS8FkdE0lh[_0PA>#Z6`jD@cTX0u %@abt;[]XcOaBg$V*35q$qDl(6;:TU<]:ka;rQ&:glqZ>K>sHOABiY+NG=22.lUSq9/(aG=7tt,.="DhoUWDM"6X;o6P7-R!2>#(L %FoBBK>HD2B>[9R7sEAJ)5cO0H_r(9;AM\pe0YLJE-G:tLb5e^*Cf<7nd2jK1$] %$JK]05I[R?Z`;p*p%[+"o1iZENsl3:rYk(uOUkDB&`"tfockj0\n%Th[lXrgf6"WY*h %ft)X8k;7o"U$e(EYNn[g\@-%G/mm5VQG9h(eSjEsIoEY`F*X/)9,mYK++uhLg/T5QBCVmWJKL %8&Y19>CUeoaHjK-o=fNV5B9c=ojjnE`Ff+&a/J:4Q?mqMHYJAl)]%c9q'j;GO>*`c %oijtqYZ:?P158i2lY!GgCK8,\ZI8Z?3ER?o=kA^Fk.LVBrVg!cqQ2"2e[qt+?/FnIFo;^sPdSo*X6\q%k/sO'ht"nY7TDZWI:VZC %P4+ng;H5ru/SDf_qI/HPAd0f"YpJ`4OTV-iHqI&_ou!WHV7e9HTCZ"je#gcXlh^G3="[_/cR-.;Wqrh14_Ut`8*5U-V3BN3Xrl=N %1enZbXQrAOLc%'\C3BKr[#^]l.oXU/ObPE[D%JgX?oYE\&6CZ(KlhgQm9_gZn>rK%VOXK$jeo'WA[pg$O_@Jf/"5n8i+]N$ig%&ufJO95i=E-&Is] %_;.oO3Z/GC[o3eRR8P@NQ+p(Xe%)^3&G(V3UZ\bWNSpcK$'ok/%@;KVo]L^osGT]CA.+kL$1u#n7V(!$.jW>;O+&Q %@Ontc>/(@6jfJ#<^Z1ZB+-!S-qr0!l7;-]Q>*U_!1Z-+1.GYlSj4T%3r*onuHiKOIBg)GBZTBf,?kLh1ePuj]\ %5gncujd.%29(f/aC=Wke#CWo]Ufp9-Vu1Vr:961:DL^JZ!BXFFSn'L,&!uHV\$;qXS\HP\G.0i"Xf_NX\/]7klXrC1qfEtF3X)ZJ1[KQH)#O$%na`@^r\9]o*2.Z/'5Ms.b<_Q*ZEd\X3dB`r2jOOBVtL.dbuU`'**tIjjPRfje)G1Up=2V27!1] %BH_Fq"2@T+N^)sa1.o/1orm8BHFn22NKc)?U*o637]._konP7!'ol=)Tm86F3'H@uhMV2]:h;%#3B0$_PQ'dD^a:-nEPC0E`7UmT %'8JVt$V!P&iR^>'*_])'UUuKg!isNju\m,ZuIZ"nbb$2'5n0QPh:lAcf#Y7fD&T*HIX$eFkm^30:K^UT5qO`L-WL1!-%U;hbGg:[ENkSp_b(-(tiObM,ERS(Y+Z7o_O %RqlQ(OM*8l'mB$tAAGp>W_Idq%c5WR*Ok7!/\ioG=KdaVG>k %ga;Z"fZ7*bji?c88ZWJK45OK9\#_-[Vdl_g9ZQSOZ0MO#P7F7:omgU*@(G?q]H=iAPHosfl,uumFV%UJ@V%aoe.-5`gj/$Thp\H- %3e)C!2]7U9bQfA`#S/Y2lM,,S!8<):.\H=_Vc->r!7h'K#5nUn>TM_lg=,lh)d;t58OVcW^\H+`"HcPIFJ"R:`2GHs4Dj3ko!uVZKb\n8M+)V#IBL\i#)%#'KF*HJhD*!"]"n?IXGI<1m)`kn39tb5X^Nke]kr. %UfQ,as$RZerHHW8\D:&3#C3I1,Pkk_:"?oh&2R1+r8ksXR8&]"'$9*%CE$@Zh[7P]'='NQRD]SKdYT1Z$'I!@[\kQ_3&$pkAA^65 %#)Q[%2tq8/2Gcg'0anUb+:f#g\Bmgu87LAJ%W':)TD13H9Wsn^^OKgH\6]:m[t/4/2b^OjYqE<^1nK\4npo*t!KCTK %.#bN_3D:8cPNXIn"W/iB"uEB7-'\W6s#sR6YE'Q,3:X0/X)J'Xp;I0J#ktcF?@YJ,XofW_ETI6Y=g\:0jcXbE$XUN>)dAF7I'KZ= %@cP'nRut>]Xj'L!EZR)UU<#/t'-"^SOEgr1U,Z>KMaa+t@=5Sc;Cn*F)JPNBn8f(_\maI&!UY[P*?3am#J1`q4A2ZF=[Ljg6DX0/ %GXFkqZ*BH7rF:rIbt8':o,;YT7YE->&KE`ZJ;*;3_?($srC`=WWTQ"&\(42Mj0O__D&',Y6nh/n0R<[Q3)-!C,`p0>B9NILON@fe %I"9)^&7a%=-4tiDq0C^;_'=LAe6BA"'R6NJ3q@l4,pEU/,+q\a#6=^WO9H3i;d'_L!1fOg*`1C-h^\b7f&d2B0U;kR?"q8doOQ"J %[bduM2[iRRO+Y\mI9?MQQ)Vbh2ZSC?4c%\f"M;7eCKm/'LTSjD8J!q5QeAAOVU`Wk.6ejZVeH%U>cO]%FcdtuZTmZn$p;<;G* %"7`c%;qV/0k/b,9"iN7]9o"b?p2M4N%37k"gB>>N/inDr]?Z>N3?8L1D!eWMA*:*l(XF:)U[%L@;THXa6kCPs60S(`eOKT0?kH0S %7NMu4l5(Y#aaCMXg6$skO"EGPq$%!4Frk^4n[,jkK`a!CQiM_"pKqbfJ6m/+dE"sXU*oreih60#(U3[aB-i[:GI74%8TVY93kCrK2.,3K3c(Jbapu;I,'j:Ab@,DBI"a\5qDZGLo*)mMe3)5fe.S&`YOf55rC)o-?^TZe %?Ksu?#)nptK4i5%PBPNKla=7u!YFi2n7')!eD(3".i.RIUT(A)#ctH1*=j=^U9c,ATRgcVU9IK'r0%5K7-c\4mlKXDg;^;G9V<`* %K1E_XXW&%Eo,RO]FoEY5T,G"V1j@C3/cdK1F3'o*?D*j6#s#BQ@.@5,1jhcEl1""i%doAL(SBsG$%DuBn5Ddq %b9OWUp0I^5kNY:OF&i(?_a#i@@hT_,T?\7gJ!)b^Yp`mL_p@:pM$MZ)d) %AC+=#RJ33pc(c1dg*0/JcM.$?$+P$KUJN$i2K'(Yl3H'([&cEdQQiEK!Bg5]L0H?X38+m4/tWKef[g\IHN-M=U[qqV(qY$o#35%R %cR'/`Xs`G!7JW2HIbKjtf;?*6'K^9(#se[Pn=Wj?bq@>&cO9;P^9R^"W(io4pF"D4'8s#&7E9"2#VM_?2@J0t9Q`V4Y;K^rK+\$- %%*%_m:*B+e?p#Os#'XuYfjKcB#UL9Ut1d`t`@8d.ai$D[Q'Zjeg8/Nag>fFq4lui_b`Qt5K %&VJJN\d9\L4d<.pi9npg"I3n#5MlNXHa''rEZ81"QrNb@7RA%;O?7Cf>egZjBa+qV3(Q4VgV>o=_$o!G>>pD"_;q\mj;cT9>:4>,dKtpR\";OMoB2E6TZ1J- %-#>P$jOOBgoPQgSDNsclOb;o$e^(CiVkUW3innkkUm$@k8HajqaE!=P2#9?FbDqITM,&ZYOIZMoo;F2OU,Xl4,f5,A.trA\EE$a'?AE*fo0OiuGB>=/,\Vql:,-'g'5_CUBN'*X2r]>oj5Q.cdb">`#FDhMt_o!-EH\3ld9JI"a"M %2g0&U%m(ALU#%/-tPq(-pYpES:M;D='EFe@F49DC)/H#*0&j;9UtT=bZMoV0->P6;A*-VW&-m %H)'4E7u]hL0[Se!d]QpUM.l9k^GrP?%%MUP[VKdI;XbN;<;!jWNRY6(fE-l<6u4/5YCEG03fbl%ih::HRR&bOF+S(A*@Fo,7O)6G %80&)!d1<"7D0!@0:r>P7=<-/d*ii!r9QC>n7TrC=m99S_hEju:UMFCD9(YIh6_7lW=\Bf>hXK>`q85ak/s %+h5X*";csjBZ#O/5-UAIDHDhL %(,ep#BOe)tB9s^4-+'Gt5HH/RQU&G,1C_iJf5N*-X=jp<)Vg9"Frn'*C.=2ul#UXu1scH$nKM[k2k,BWS2Jg-qu^5SQb0]QD]nq9 %W:7utou1Rt#_'K]TZmr\%r+Kb>j%1l00+/.%2Kr]o@agq%P/p1"<_Qb@p6,0^:N?/)oV()Ja#2@.'5)+rq_lZ%"q7? %q:>&rC!_H%)Rl6Y^K-t?k:NO:f594[O+!6K=QL^/"_D%BTQk]Pl>6L9U,8iN\9!&UF@X[[8gBeA?Rb#:=p8oOpC.5pT/;A[5+;6o %_oM\n)S\8tm.9]DS)NLV8m)^RYE$&u,F$TZN&h_;4"3PUY;3dk-ro9ajKFFZhC>o+s%WJBdp&XiT`'68o>(t\pQ %H-1-_PKCUdHkZj?^lu4*b\+.GVHXg#LQeV)8!F&p"94u^rt#.!.khs8BD>.)5%*@0AhO^5$n$o\I7j+Q+MosdI-i7RP#<=oQ/Qg/ %8:R!NI.V[2]K<5N7E425NsPoqfIlq#]T0eFX"HZF)do'Hptt+=JS&%R%%rI(MT7H-Eo)V45NM?R1))*p0_nomIYne2N/\@[k(isa %;88_q"$\WN[J*.4G90;+^.K%5o*XEo-;o,F;M*(G@=Ue=8`d])p(,&P7M5.L?NJa9N5n0@O#-:c(O5QgP=joBW>rr`0[HpR1B>33 %DZW%f8apP^@o@\$O^m&ao8T>QEp9+j/=Xgg7;kN,2co99^_0=Uc!h`76^k87.T$)b!H17s:)Q!33a/)[-.[<^B$9W6PP0PCp %R='nV#P]"".'VqMPNGc"9Ie:CblFBX-F,2C\NYZ'VV.L0N)+(OT;_s_.f`OIGlXrH0On%cRe,;$7qRD%);:*rta"!CjDmU %2oflk>U%@&F6X0Q$q)qmk;0PL!X%HW6<\fWE[_2V8Z*^di4r8Oogq=L&;3ibfo5WmSIU`B*)d;cl!0suA^T#i10I-jj&(gAT4]>H %Bu%!$g?jar=`Y1<3WWWQ"l9I@nuMhg8u4bE8/3ltf15',i"s1:OH0,>Qd0'3!G4r8^^&G/h4A0YSN'"=',.ifk$W7E^q-M(lQ^JX %Mdo0i!%TE:DmS+d"?eo%%CV.+nUP>9;nb$AHpDr%QGDd[V=AqusgAIE3cCWN8-2.?9*]b6(K?6J(e( %!(N3..gHJ7_e7C'eMLfLgP6ch!o[Yp[P2_m%MgUj*'c/W0\8F[81'PY;Jb&6#P;XXJ5qJ>iP;fR(S=gD"!e-;QH_cg$%CsGC`<"T %49NL`F*Y,2^lmjg\[;5irS+1W4I&tK]aOWeP6Nck`IZWk>I._[^tL]=nN"B34AZT'aD)SLFnuc%1S>Ub$h."GX%EX]"fOAg4QG?L %B)s"RE$,Vk9G[OsV[tYi=b*RPTHD5U+dRn2W]_4)Z="\<\Y_U5!9M%o/RXPCHJeZ$I)N? %AI9_PrivateDataEnd ViennaCL-1.5.1-src/doc/manual/figures/logo_px200.eps000644 001750 001750 00000164241 12267307531 022066 0ustar00rupprupp000000 000000 %!PS-Adobe-3.0 EPSF-3.0 %%Creator: GIMP PostScript file plugin V 1.17 by Peter Kirchgessner %%Title: logo_px200.eps %%CreationDate: Fri Dec 4 11:45:55 2009 %%DocumentData: Clean7Bit %%LanguageLevel: 2 %%Pages: 1 %%BoundingBox: 14 14 63 63 %%EndComments %%BeginProlog % Use own dictionary to avoid conflicts 10 dict begin %%EndProlog %%Page: 1 1 % Translate for offset 14.173228346456694 14.173228346456694 translate % Translate to begin of first scanline 0 48.008225409286787 translate 48.008225409286787 -48.008225409286787 scale % Image geometry 200 200 8 % Transformation matrix [ 200 0 0 200 0 0 ] % Strings to hold RGB-samples per scanline /rstr 200 string def /gstr 200 string def /bstr 200 string def {currentfile /ASCII85Decode filter /RunLengthDecode filter rstr readstring pop} {currentfile /ASCII85Decode filter /RunLengthDecode filter gstr readstring pop} {currentfile /ASCII85Decode filter /RunLengthDecode filter bstr readstring pop} true 3 %%BeginData: 58520 ASCII Bytes colorimage JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> bl7bDs8MusrVlcq!ri6!r;Qits8Mlp!<2rs!ri6!r;Qits8Mus"9/?#rr)iurr;uqrrW3"rr)ls rW3&urVm!!s8N#rrrW3"rltIn~> bl7bDs8MusrVlcq!ri6!r;Qits8Mlp!<2rs!ri6!r;Qits8Mus"9/?#rr)iurr;uqrrW3"rr)ls rW3&urVm!!s8N#rrrW3"rltIn~> bl7bDs8MusrVlcq!ri6!r;Qits8Mlp!<2rs!ri6!r;Qits8Mus"9/?#rr)iurr;uqrrW3"rr)ls rW3&urVm!!s8N#rrrW3"rltIn~> bl7as]`%g/"9.'5rr)iurjUterr`8`Xo84u"9-0brr)iurl4X/rrVnoq>L bl7as]`%g/"9.'5rr)iurjUterr`8`Xo84u"9-0brr)iurl4X/rrVnoq>L bl7as]`%g/"9.'5rr)iurjUterr`8`Xo84u"9-0brr)iurl4X/rrVnoq>L bl7a/7Jcr^!hZFhr;QgZ[/U%("25cOrr)iu=b$>HrrQ`jrr)iukS bl7a/7Jcr^!hZFhr;QgZ[/U%("25cOrr)iu=b$>HrrQ`jrr)iukS bl7a/7Jcr^!hZFhr;QgY[/U%("25cOrr)iu=b$>HrrQ`jrr)iukS bl7a:=n_go"9,7&q>Lf#rVm!!>cdc"rr`4+pAN;:J,~> bl7a:=SD^n"9,7%q>Lf#rVm!!>HIZ!rr`4+pAN;:J,~> bl7a9=8)Um"9,7%q>L-.Purr`4*pAN;:J,~> c2RnFT3M'grr`1uZ24G!"8c!.rr2p#rPU\irr2p"rG@PirVlusG2iK"rr`8a1@>&?"8n:0q#13p r.i&arr3*"q(g(jrr`5-=nhmp"8lK.qYgErqdK6ucMrC~> c2RnFSm(merr`1tYkn=u"8bp+rr2p#rPLPfrr2p"rG.AfrVlusFQ38urr`8a0^\i="8n7-q#13p r._r_rr3*"q(Tngrr`5,=SMdo"8lE,qYgErqd9*scMrC~> c2RnFSlkacrr`1sYPS4t"8bj(rr2p#rPCDcrr2p"rG%8drVlusF5m/trr`8`0(&W;"8n4+q#13p r.Mf^rr3*"q(Bberr`5, c2RnFT3M$frr`1uYke7t"8c!,rVlg"rPU\grr2p"rG@MgrVlusG2`Aurr`8a1@+o="8n:/pAP!n r.i#`rr3*"q(g"hrr`5-=nVan"8lK-q>L c2RnFSm(jdrr`1tYPJ.s"8bp*rVlg"rPLPdrr2p"rG.>drVlusFQ*/srr`8a0^J];"8n7-pAP!n r._o^rr3*"q(Tkfrr`5,=7uOl"8lE+q>L c2RnFSlkacrr`1sYPS4t"8bj(rVlg"rPCDbrr2p"rG%8drVlusF5d)srr`8`0'rQ:"8n4+p\k*o r.Mc\rr3*"q(B_drr`5,L bl7a9=oA6u"9,7)rVc`urD('*rr3*"`&.Djrr`78NW&hU"9!<'rr2p"rol'*rVluuO*pm(rr`7S _>aE5!rFUorVm!![9*;)rr`7)Yl4P#"8uR(rm(Oo~> bl7a9=oA6u"9,7)rVc`urD($)rr3*"`&.Djrr`78NW&hU"9!<'rr2p"rol$)rVluuO*pm(rr`7S _>aE5!rFRmrVm!![9!5(rr`7)Yl4P#"8uR(rm(Oo~> bl7a9=T&-t"9,4(rVc`urCss(rr3*"___5hrr`77N;`_T"9!<'rr2p"rol!(rVluuO*gg'rr`7R _>aE5!rFOlrVm!!Zr[,'rr`7(YPnG""8uO'rm(Oo~> bl7^;>l4O%WjVa(!E$Z1rrL+tqu6\5OSf%YG3]#*!p_Q4r;Qe\J,B6HLW',j!rXh!r;Qf,=o84" ?*jG.!+=%NJ,~> bl7^:>5S=#WO;X'!DgK.rrL(qqu6\4O8JqXFQr`'!p_K2r;QeZIJa$FL;a#i!rXasr;Qf+=Sr+! >dF8,!+3tMJ,~> bl7^9=o84"WO2R&!D^E-rrL(pqu6\3Nr/hWFQr`'!p_H1r;QeZIJa$FL;Wrh!rX^rr;Qf+=8W!u >I+/+!+*nLJ,~> c2RnFWGMD/rr`5)\bl@*"8cTFrr2p#rQ7h0rr2p"rHjt0rVlutK]`74rr`8c8+?KW"8n[Hq>L c2RnFWGD>.rr`5)\bl@*"8cQErr2p#rQ._.rr2p"rHjt0rVlutKBE.3rr`8c7e$BV"8nXGq>L c2RnFVJ#c(rr`5'\,?4)"8lH?rr2p#rl@P)rr2p"rcXV)rVluuJ*-b0rr`8c5OeXO"9"UAqYgEr rK"qrrVlrq5L06-"9#fAqYgErrFr\;rVlutC\n%Us*t~> c2Rn9Q[-OJrr^f@T\9>E"SofSV"FZd"QZIp_"[j."6"ckeG]CL`It\grr3)b[qqPJ,~> c2Rn8P&nG;rr^`9S(@T="SoZIT_/6`"QH.c]_DF*"5eH`dJa(I_LJlZrr3)aZXCHErr^H&K>mfh "3u[rir8r[oX\G-rVm#PS8u:?rr3)LFILeZrr]rW]^sGKJ,~> c2Rn6Kk.Xjrr^K%O3mn+"Sf90POe_Q"Q#JDZ1e1s"57R>b5M>B\T+P5rr3)^W)0i!rr^/fFhI\Q "35\Qh>[EVo<;&[rVm#JO(5Hlrr3)FAW>%7rr]T?Z16->J,~> c2Rn:Q?L=Irr^lBT\KJG"T#uSV=jif"QcRm_ta62"6+cjf)>UNaF^tlrr3)c\REDSrr^W/L c2Rn9OD_u7rr^f:S([f@"SocGT_/6`"QZ:^^\@a-"5n?Ye,B:K`-\o\rr3)b[9:9Crr^N&JB7]h "42^qj8T&\ot=M+rVm#RRr,qWrr^&V^\#hOJ,~> c2Rn7JQobbrr^T&NRRn,"SoK+POneR"Q5S9[eB_#"5II4c2IYE]P=J6rr3)_X$a>orr^8dDnu>O "3P\Nhu`@,0rr]`:[dq`DJ,~> c2Rn?S8I-]rr_JZWp9T`"T7"dZ2+D!"Rrfug\_!M"7CGskPbD_hg`oErr3)lc;<-krr_>FL#Dq0 "6GHJnG`Fiq:\ZWrVm#cX)-Phrr3)aDQ)gurr_%rg\K.pJ,~> c2Rn?QY,%Orr_GUVWn*["T6t[Xnhtr"RiWigACmL"7C2fk5G;^hKd<0?n,E=hq:S9JrVm#bVe"N[rr3)`B;Fbirr_"igA&tnJ,~> c2Rn>McAC.rr_>GSEKnO"T-bCV"jrh"R`6If)#CG"70HFj8Ju[g1eP!rr3)ja"c!?rr_2/E8:El "6"I&mJd+fpsh+'rVm#`SPn&;rr3)^ cMqADgmV[6g%bU+ihEL5j8\i-k?,(GoDeI?\3oH:s8V0 cMqACgQYh%fD,C'iLHe'iW&W)k"`,6o)J@=[5d=*s8V*8B85F"s8CR2Dq*P-s6SOk1>CFYs6AX" G3%fcs4H=[ZejdCn(QhX][uiVn_g cMqABeqccTe+`mshN=>ahZ*5tj>s0hnc/16X cMqA;]QI&M_Y!lRY^1i*dJraPVb2ZWm/QL_M*.]Us8UQU=_t=_s89a5?C\Xds5CMY3MXIgs5'kp A#n;_s2;'QLpkn7k.[]QNPb2bl*>k9Vp,2@c`p>rZI/^B_P^>1^$F#EJ,~> cMqA:\SXd2^[qKLXE&Qgcio]'@WAJ,~> cMqA8Z<]2U\FT[>V.=A>b5^n cMqADYr5'&gA(^,@5K`Tj8\hq+\=?!oDeH@/l*Y3s8V/3698?,s8CY,5UNQ!s6Z:%:c$i(s6E<= 5!%p-s4^I7/rJfkn@To@/DBgtnn^$N+kZhpiaYG5C#f6fg*B*1\b"/IJ,~> cMqADXXlRXg%bU*=Xke/irA_n'gEtRoDeH;,=N?es8V/-3&":ds8CUu2B&L[s6Q$i7k!'hs6;m' 1b\%ls4U(#,D>1Yn@9<)+kcShnRX(2("`KciEJDl@H.=\fbljd[dqcEJ,~> cMqABVALZkebB+"9,/+@huEAd!\+eeo)J<.%kn($s8V%t,9'&'s8CI`+U=\*s6>RK0c&=;s6)3W +!0MCs40:Q%sm:9m^!!M%Ft.On5p>P!n72LhG#@2<8[`LeHR?%YOTs=J,~> cMqAD[R*VQgA(^,Da?X5j8\hs2bZHNoDeHJ5#4,\s8V/>8OR=Ls8CY@834$Cs6ZO@:e'1;s6EoX 7nr4Is4_$P5*P./nA$tZ4kfW0np*Jn2qe62ic.UVG2rVsg,_hb^%9SMJ,~> cMqADZT1?3g%bU*BK.njirA_p/OMD.oDeHE1e'(=s8V/85]D](42IJ,~> cMqABWZ`l7ebB+"d`0dYBJs6)Zl -8" cMqACZ9UoEf(]4$D*L1+i;`Jk2+]pBo)J cMqABY;JL$ebB+"AMc2\huEAh.R,\uo)J9ceL`p6[dh]DJ,~> cMqAAW$3W9d.[Lm=Wnnsh#I#^)CZc6nGi'3+"S>Gs8Uo$-R;X?s8:4n-6/cDs6,Ub/1T$Es5lZo ,r4K[s3aOc++`>Lm'@T]*mjQZmURFh)U\Tbg05sK@,(_RclFMQYOKm cMqA6_jOQp[IO:7hP..,a8bP=oLl[/kPsho]e]6!s8U6jAr5HWs804lDr8Ofs4b]T/EF58s4?Cj GjNfWs0q$=]?nh&iP4q7`O5+5jP$b9oZ$q2`84UrgrKH_[I[Vp^us&DJ,~> cMqA5^lh@ZZ17k0gmk:q`;f25o09Onk5X\j\0^a`s8U-d>_1b?s80(gB&1>Vs4PKN*oaI$s4-1^ DsGXJs0L[/\'E.rhn8J#_6NA,j4L>%o>CV._:q\\fu3pXZ1:`_^#dT?J,~> cMqA2\:lj.W:0bsfT;ZS^AmK%ngUQFj8\8^YRC$6s8TmX8TNRgs8&\[<8"n5s4"m>"Q-0Rs3T_H ?KfK1s/OphYK4Z^gpHMQ\u=ooi7+ATo">#&]%B!2e\2"KW:<"@[GoO4J,~> cMqAQoUB[Qr;Q`ok+Sd`Ss8W&aGECqas8W#RIa.K_s8DK5:=f@Cs8Cu? K])h/s8:r8ZgRelrq>&@]);R-rog@sg\_$Nr9%ppkktJ[r7l5FpAWD cMqAQoTj(Dqu6Wnk+/@3r;Zckg0LiSrr;ubYtEaFs8W#`EJilUs8VuPGK]UVs8DH27Fh;8s8Cr8 Ic(,(s8:o1YjMDhrq=u5\,61)rog4hgACpMr8qUdkPYAZqqGf cMqAQo86H!qu6Wnj-#nnr;Zckek)Y0rr;u`W&3*"s8W#^@"%&3s8VuMB>sH?s8DB*0@9Urs8Ci( DVP*hs81_sVsF9\rUnYnYP\>!roKYHf)#FHr8U_Djo#/Xqq"m"o`!2:J,~> c2Rn;Gs0IOrr_).Mr=U7"T$:uPkP+V"R2?qbPD/;"6N9mg\q-Scr@RHrr3)f\eiBbrr^kh>giTN "5.1Ol2L\bpW45FrVm#YN'#5]rr3)W5De:rrr^J2bP'6]J,~> c2Rn;FumhFrr_#)Lu/.2"T$1oOnSeS"R)0jaS>c7"6E*fg&:pQbu(n=rr3)e[hZgZrr^bb=jR'H "4pqGkPkJ`p;Ri=rVm#WM)`TSrr3)U4GM\jrr^A-aS!jYJ,~> c2Rn9D`#E2rr^hrJ_C&&"Soh_MY-oJ"QYUX_"dp/"5uRTeG]CL`_*E&rr3)cY7A5Drr^SV;T\n; "4:83jSo/]otCp&rVm#SJhk.>rr3)P21OqPJ,~> eGfmTbi7m;mJcmpU$)CQ"7F%Hn,E7gqrO?kq>UL9?s*t~> eGfmTbi7m;mJcmoU#l7O"7U]la"b*rqurpj,_Rcr<2Q.T]H%Mr<)G\`:!U&"6?6L9?s*t~> eGfmTbi7m;mJcjlT]5qK"7!VX_X%1!"5op3q#10>s*t~> ec,pI^u/`hGaPnjrmq%urmq"trmq"trmq"trmq%urmq"trmq%urRUqtrmq"trmq"trmq%urRUqt rmq)!!p]LVe,Op~> ec,pI^u/`hGaPnjrmq%urmq"trmq"trmq"trmq%urmq"trmq%urRUqtrmq"trmq"trmq%urRUqt rmq)!!p]LVe,Op~> ec,pI^u/`hGaPnjrmq%urmq"trmq"trmq"trmq%urmq"trmq%urRUqtrmq"trmq"trmq%urRUqt rmq)!!p]LVe,Op~> f)H'TcI$VL7[C f)H'TcI$VL7[C f)H'TcI$VL7[C fDc3Sik)2j!*P/b`3u8H#/A@*jRD[;f`-I~> fDc3Sik)2j!*P/b`3u8H#/A@*jRD[;f`-I~> fDc3Sik)2j!*P/b`3u8H#/A@*jRD[;f`-I~> f`)'Ljg))g!!<7!HuYa!#BFa!Us7-Rr7V+#~> f`)'Ljg))g!!<7!HuYa!#BFa!Us7-Rr7V+#~> f`)'Ljg))g!!<7!HuYa!#BFa!Us7-Rr7V+#~> g&D0?d_1nD!!E?\I&XWHZNme@Z-LS;hu)4.J,~> g&D0?d_1nD!!E?\I&XWHZNme@Z-LS;hu)4.J,~> g&D0?d_1nD!!E?\I&XWHZNme@Z-LS;hu)4.J,~> gA_9Pbe^L]!!NB:LTeSHS'_GdbfnDXnbq5BJ,~> gA_9Pbe^L]!!NB:LTeSHS'_GdbfnDXnbq5BJ,~> gA_9Pbe^L]!!NB:LTeSHS'_GdbfnDXnbq5BJ,~> gA_64erT_/"9iSCa3p gA_64erT_/"9iSCa3p gA_64erT_/"9iSCa3p g]%Au_J0,M!!EOYX3K"WbQQ\`c-,/&s*t~> g]%Au_J0,M!!EOYX3K"WbQQ\`c-,/&s*t~> g]%Au_J0,M!!EOYX3K"WbQQ\`c-,/&s*t~> h#@K$e#OGe!!WH0I\OX'c'HQWbN86Cd,a$_J,~> h#@K$e#OGe!!WH0I\OX'c'HQWbN86Cd,a$_J,~> h#@K$e#OGe!!WH0I\OX'c'HQWbN86Cd,a$_J,~> h>[STj(SVo!!FL@YKbFYbQZ)Mrn#6Gh>`!~> h>[STj(SVo!!FL@YKbFYbQZ)Mrn#6Gh>`!~> h>[STj(SVo!!FL@YKbFYbQZ)Mrn#6Gh>`!~> hZ!]&j(o\6!!WH1KWWJDc'6EVaQNQTg$.eYs*t~> hZ!]&j(o\6!!WH1KWWJDc'6EVaQNQTg$.eYs*t~> hZ!]&j(o\6!!WH1KWWJDc'6EVaQNQTg$.eYs*t~> hZ!Z)BcI$O"p,QQYf>&7PKX6PkPj)\f\bQcJ,~> hZ!Z)BcI$O"p,QQYf>&7PKX6PkPj)\f\bQcJ,~> hZ!Z)BcI$O"p,QQYf>&7PKX6PkPj)\f\bQcJ,~> hZ!rA"#:hr!?=t:^s\@h#0?!,f#YtihZ&*~> hZ!rA"#:hr!?=t:^s\@h#0?!,f#YtihZ&*~> hZ!rA"#:hr!?=t:^s\@h#0?!,f#YtihZ&*~> hZ!o17]KGh4+a]Vd#cHUaQNQQaNWT!s*t~> hZ!o17]KGh4+a]Vd#cHUaQNQQaNWT!s*t~> hZ!o17]KGh4+a]Vd#cHUaQNQQaNWT!s*t~> hZ!l7IDU"WH*P05O3A*TkPj#Qbi&O#kOS69lgs9=J,~> hZ!l7IDU"WH*P05O3A*TkPj#Qbi&O#jmhp4lLO*;J,~> hZ!l7IDU"WH*P05O3A*TkPj#Qbi&O#ipcL.kj[a7J,~> k5PVWp%\=Uo`"jpQI;[aWl3AHbSJ:_rmf*7htZo\\\5Peh"gU7rr<#trpKf:~> k5PVWp%\=ToD\aoQI;[aWl3AHbSJ:_rmf*7htZlX[^roYgA1@3rr<#trpKf:~> k5PVVo_8+Po)AXnQI;[aWl3AHbRqqZrmf*7htQcNYHk@ k5Pq0akGIm_#OH5a4$8r_:O[j"j#m+e]?2 k5Pq-a4].f]`8$1a4$8r_:O[j)T_+@e]?1mf\5pH/JY#Bh7gMurr)`krVulro)F4~> k5Pq&_:@)R[K$:*a4$8r_:O[j)T_+@e]?1ke(4(.'*=Zafs\3arr)`krVulro)F4~> o)BX-kLA&5e_8g5UqXglGWJ31c,HjO`RC,s]$cS^)9D"?e]>pq>Am_98juZiEHlhM?X@,J?9Z'm s*t~> o)AdjkLBL]'\_?sT=Vq^EA0Cdb/::F`RC,s]$cS^'$088e]>pl:2!He5s%VBC2nE9ra#\6>s>sl s*t~> o)AdjkLBL]'\_X#jk s*t~> nc'JS-8RP;1,hmJ>uN[J:JX>8;ZsnTe!>3hdZ)HhaQNQQaO8hth"*Kd,cBh_`529Kr:TgXrVc`d s*t~> nc'JS-8ID70f;U?<(\Vu7Rf?c8d)rKe!>3hdZ)HhaQNQQaO8eqg[ZpM)5H?M_7f[Dr:TgXrVc`d s*t~> nc'JS-8@820Jc.!5;Wrn0JFX\1^(V5e!>3hdZ)HhaQNQQaO8_jfC9jt#+OZ,\?l&2r:]mYrVc`d s*t~> o)BX,rr2imqu6WmVU!oaIOI!mnCl.Y`RC,s]$cS^%`mi4e]>tms89:Ug=jEsk5Tr~> o)BX,rr2imqu6WmTZc$VG9&,In(GkS`RC,s]$cS^%`mi4e]>tms80(Og"F-lk5Tr~> o)BX,rr2imqu6WnP/Yutms8/bAe^_1Yk5Tr~> n,NCds8N`2lh'qs\?`KNbP1Z*`RC,s]$cS^%`mi4e]>tms8MH[p\44Ok5Tr~> n,NCds8N`2lLX_m[B?dAaS,6%`RC,s]$cS^%`mi4e]>tms8MEYp@e%Mk5Tr~> n,NCds8N`2k485bY,A5#_Y3Nr`RC,s]$cS^%`mi4e]>tms8M?Vp@dtJk5Tr~> k5PqUlgaN3kPtS\`RC,s]$cS^#0?!,e]>tmhZ&*~> k5PqTlLFE1k5YJ[`RC,s]$cS^#0?!,e]>tmhZ&*~> k5PqQkjS!)j8]/X`RC,s]$cS^#0?!,e]>tmhZ&*~> hZ!fY`RC,s]$cS^%`mi4e]>tms89pqj5\PHk5Tr~> hZ!fY`RC,s]$cS^%`mi4e]>tms89gmio8;Ck5Tr~> hZ!fY`RC,s]$cS^%`mi4e]>tms89RbhquQ3k5Tr~> k5PqGhWX1\f`2!M`RC,s]$cS^)T_+@e]?+nl0-oDG%n8Ale:"is8Muqrr;uso)F4~> k5PqEh<=(Zf)PdK`RC,s]$cS^)T_+@e]?+mkNCN7E+,m,lIXSbs8Muqrr;uso)F4~> k5Pq@g#_>LdJs7F`RC,s]$cS^)T_+@e]?+ljQ4p"? o)JahrVRB/s8:dAo$jh1SZ:-.mJ4`MQ?TpgNQ`'WkPj#QcaTICW]i['I]9?>T"20*Yl^]Xrpp)>~> o)JahrVRB/s8:^>n^4A$R&84"m.nWLQ?TpgNQ`'WkPj#Qca9"5V)(%XGGM+(R^oa'YQM/*hu<3H J,~> o)JahrVRB/s8CR7nB@V`NLn;^l1i6HQ?TpgNQ`'WkPj#Qd'/FrRNW^cBptNMNjl:lYQCTWrpp)>~> o)AdjiQ;)B'[Y:VS@-#LE%s@ca1S).`RC,s]$cS^)9D"?e]>pp=D_/191DcgDg-JE>ZtE==us=d s*t~> o)BX-iQ96cbL"YbQEe!;BI5B>_muB%`RC,s]$cS^)9D"?e]>pj9P.!^5s.V>BQ&!0>?Y68=us=d s*t~> o)BX-iQ00bbKnS`Kr;2f>7hZK]Wmmd`RC,s]$cS^)9D"?e]?!g3E%mc.OQu@$5$4=ZX4c s*t~> o)BX-WH!30H$F[lKnP##<^K8SFb*-4`RC,s]$cS^(Wbe=e]?+nki^f?EFZ95lIjbcrr)Zhq>UBa s*t~> o)AdjWH%l]'7,I`I=#g)3'9cKHUuQ'e!>3hdZ)HeaQNQQaO&`)k1aN`>HHfJf$sTrrV? o)AdjWGqf['7#7HD/3hdZ)HeaQNQQaO&]%j4IR<9r6_0d*;IerV? o)J^g(]F:0rr;o+f])%R9icr0in*kBe!>3hdZ)H\aQNQQaNWTBr5@LPim6TGs*t~> o)J^g(]F:0rr;o'f&>\F6qr-liRRS>e!>3hdZ)H\aQNQQaNWTBr5%7KiQ^9Bs*t~> o)J^g(]F:0rr;qqdGEl31-'F8hTtf4e!>3hdZ)H\aQNQQaNWTBr4:V=hT+F6s*t~> k5Pq0akGIm_#OH5`RC,s]$cS^#0?!,e]>tmhZ&*~> k5Pq-a4].f]`8$1`RC,s]$cS^#0?!,e]>tmhZ&*~> k5Pq&_:@)R[K$:*`RC,s]$cS^#0?!,e]>tmhZ&*~> k5YJ[s8N&ss8N<%`RC,s]$cS^#0?!,e]>tmhZ&*~> k5YJ[s8EB(r;Zfq`RC,s]$cS^#0?!,e]>tmhZ&*~> k5PGZrV['#s8W&:e!>3hdZ)HTaQNQQaNWT!s*t~> k5PGZrV['#s8W&:e!>3hdZ)H_aQNQQaNWT@qo74?fZ`=Qr;>gZJ,~> k5YJZrr!0$s8W&:e!>3hdZ)H_aQNQQaNWT@qnpt:f?3"Lr;>gZJ,~> k5YJZs8Duqs8N<%`RC,s]$cS^&]j/7e]>tmrVF1SeC2@krVZQWs*t~> k5Pq0akGIm_#OH5`RC,s]$cS^"j#m+e]?2 k5Pq-a4].f]`8$1`RC,s]$cS^)T_+@e]?1mf\5pH/JY#Bh7gMurr)`krVulro)F4~> k5Pq&_:@)R[K$:*`RC,s]$cS^)T_+@e]?1ke(4(.'*=Zafs\3arr)`krVulro)F4~> o)Jah!;ZWh'`J%,\_,p(ND*<)kMb%c`RC,s]$cS^)9D"?e]>mmme] s*t~> o)Jah!;ZWh'`J%,[+=3tLI+[_jl+b^`RC,s]$cS^)9D"?e]>ph87P1S69R_;ASlI&='&L+=#R\\ s*t~> o)Jah!;ZWh'`J%-W6jD\I5*[%in_rP`RC,s]$cS^)9D"?e]>sd2,H(X.k!&;;d*[B<`W:'<]7S[ s*t~> o)AgkVegMTFq]1VKnFnu<^K8SFFZp0`RC,s]$cS^)T_+@e]?(ITW5.X5V[hkXeDE"[^< o)BX-VegHtF`r"`IXHHX9K5.+CO/Lu`RC,s]$cS^)T_+@e]?(FR\m&?2CEdJW19Bh[C!0DYbp@X o)F4~> o)BX-VegErFEMeYCM[`i3?oh->&$IP`RC,s]$cS^)T_+@e]?+CNgu^a*><7[T8nuC[Bm*BYbp@X o)F4~> o)BX-YC(tUK7\f>LP^h:=[5ATIu%%Q`RC,s]$cS^(Wbe=e]?+nl0-iEH#0eGm+g:krr)Zhq>UBa s*t~> o)AdjYC-;#'82KuK7\;D2EOTVJl:+;e!>3hdZ)HeaQNQQaO&`+kh'iqAZjtWg"6-#rV? o)AdjYC$5!'8)9^F*)(Y+=&s[FALB)e!>3hdZ)HeaQNQQaO&]'jj[gM=Jt!>eC"3nrVHBdrr2Kg J,~> o)Adjs8W)ts8N`2hsL!`Y*tr_dIQ]#`RC,s]$gDu!mAa.nAkUOrla$>s2tGcamR4Ma7mRVaN;TJ bPTKnaQNQQaNWTBroES>o',],s*t~> o)Adjs8W)ts8N`2hs2tGcamR4Ma7mRVaN;TJ bPTKnaQNQQaNWTBro o)Adjs8W)ts8N`2f]qtMU5P&(ameQi`RC,s]$gDu!mAa.nAkUOrla$>s2tGcamR4Ma7mRVaN;TJ bPTKnaQNQQaNWTBro!56nE03%s*t~> k5PqGhs'C`g&M*N`RC,s]$gH!#Kk-,^q$n][JdQ4\,WlA[C3WZ`5g$Dgrp/Lb/hK5\@&]O[C3O@ \,Nf=[C!9FYkbI,Z*q<^`lcL&bQc/Ormf*6i84Ka~> k5PqEh<=([fDkmL`RC,s]$gH!#Kk-,^q$n][JdQ4\,WlA[C3WZ`5g$Dgrp/Lb/hK5\@&]O[C3O@ \,Nf=[C!9FYkbI,Z*q<^`lcL&bQc/Ormf*6i84Ka~> k5PqAg#hGOdf9@G`RC,s]$gH!#Kk-,^q$n][JdQ4\,WlA[C3WZ`5g$Dgrp/Lb/hK5\@&]O[C3O@ \,Nf=[C!9FYkbI,Z*q<^`lcL&bQc/Ormf*6i84Ka~> hZ!fY`RC,s]$gK"#g:9([]#OSNK!jrr/Uc9#*>.F[(s](grp/K`kSgOPDtEWN/`hnNrP(AN/<:B KDC'&Ko_e+\Al\hbRV_Wrmf*6i;`fDp%J+Kp?)C*~> hZ!fY`RC,s]$gK"#g:9([]#OSNK!jrr/Uc9#*>.F[(s](grp/K`kSgOPDtEWN/`hnNrP(AN/<:B KDC'&Ko_e+\Al\hbRV_Wrmf*6i;`fCp%J+Jp#c:)~> hZ!fY`RC,s]$gK"#g:9([]#OSNK!jrr/Uc9#*>.F[(s](grp/K`kSgOPDtEWN/`hnNrP(AN/<:B KDC'&Ko_e+\Al\hbRV_Wrmf*6i;`fAo_%nFo]H1(~> hZ!fY`RC,s]$gN#$-UK.ZCZ_`?s6_4>5VV)=pJ2:BR?;W_9(NW%*Qf/YEa3?>?4g->$Cc,s&oS- <)?=a:&@Ti9i,,8Mke&-bPfWpaQNQQaNWTBr3b,0g;;P*s*t~> hZ!fY`RC,s]$gN#$-UK.ZCZ_`?s6_4>5VV)=pJ2:BR?;W_9(NW%*Qf/YEa3?>?4g->$Cc,s&oS- <)?=a:&@Ti9i,,8Mke&-bPfWpaQNQQaNWTBqm"])ftZ/$s*t~> hZ!fY`RC,s]$gN#$-UK.ZCZ_`?s6_4>5VV)=pJ2:BR?;W_9(NW%*Qf/YEa3?>?4g->$Cc,s&oS- <)?=a:&@Ti9i,,8Mke&-bPfWpaQNQQaNWTBql%lle[X)js*t~> nGiIcs8N`1lh'nr\?N nGiIcs8N`1l1=Vl[&pR=aS,6%`RC,s]$gN##g:<"RX/:=:eU[U$q<`W7R]^478Is$_91TX$-L;q P&FE!:J=7T8,PjU70,bFr^[#Zr(%&_7S6p4ZGarbbS/(\rmf*:_QLAa0Jal-]Xk#~> nGiIcs8N`2k4/,`Xehqs_Y*Hq`RC,s]$gN#$-UE#RXSpV=]8,m:'scl9heA`AVR;&b2(8Ib/^lP F^\NXLgS"%dLbfe2R`9R?ms*t~> o)AdirqHEc'`J%+W6a2dIOI!mnCl.Y`RC,s]$gN#*6ZF4P]pr&ObeF6I!g9eH$OXXE,&T<&-.X4 c->+h$d-JoN,X9-P`^??Hi89jGmS\jY1eoSq :f'/9:gms+0.eq70*J o)AdirqHEc'`J%+U+h)p61*N-pqgWMPQ3Mi*CMMM[1GLkg\=Ll%FfSGnubS=Q:DQ\/gB@BRWJbSeLbrmf*3F!O0q 6UsWu1.PJT7P68H0J3L.nc++~> o)BX,rqH-\p\k*ePJu)=C^:SZma]5E`RC,s]$gN#$-UE!P`Mupd(la^X8f4"W=,`*W2?;dS;+Dp c->+h$HgAnN0V?0eBY.rrMfpuWiH#ss/H.$^ruZ#$-plCbJCHMP-i7,bSeLbrmf*5D%83%/28jn )DOTU5:nE=/hI4+nc++~> nc'JR,r.;60JuLE>Z*FF:f'J7;?O_Re!>3hdeq@,U+?P*[)ga3^ZM/0F7_Q`6HR*bSnRcrmf*~> nc'JR,r%/10Jc=:;b8Dq7n5Nc8HZcIe!>3hdeqK7S;fB3eM8:ej_W92/8e@VK[p=ZIm=`Qc[+bSnRcrmf*~> nc'JR,VLi*0/>mr4YmWj0ejd[1BPA2e!>3hdeq~> o)Aail2B?NgtpnY]Yr3>/i[Un`2r7Ce!>3hdeh6-b/M&_@n/:.%OsY,5!1e].iSd!))+it[b&pO h96\X`4:Us3@c'g00DE]3]JrJ(C^]X#6kD1"UtYA#n%.M#QY3;d*U(_rlc2"kPj#Qc/8?j^V%@p ^>/SXp&>!jrVlcas*t~> o)BX-l.atHg>(N@QaaiWCa:WBcbc^I`RC,s]$gK")p6.5Tm'm?;*@lhAS#@Z>uaHP-o+(mDn4KI c/$Snb/CiRAQ;>h86KG5A78kO=%,/+/L`%o-7'rb-R^Q$2EO&\.O8fVd*Bo8bSS@`rmf*7htcuY \\5PcgA1F6rr;urrr) o)BX-l.atHg>(N@Lp+G3?k*oNah+A4`RC,s]$gK"$-L6#Yb7,8Oe&KNZ4=.CY,\:PCgggrKtuOdBl8-FMjU'$HsVTTd*Bo8bS/(\rmf*7htZlO ZaR3HebSh-rr;us!<)BdJ,~> o)BX-s8W)srVliqg["@ZX-/dHe*u]!`RC,s]$gK")p?CF];M-YLL;>K+sA$I'b(?Z:mn/d\^8sU c/$SYbK@l4SWo%Y3=m/R+<;@7%Km4f/kPt5s%*Yb,TR-j!<<*=En/Xbc2PosaQNQQaNWTBro*;7 nE06&s*t~> o)BX-s8W)srVliqf]qtTVN$_5dI?Dq`RC,s]$gK")p?CF^949"P]f_o:/4JS6p*7[@A0^0][>?Y c/$SZbK@l8VOj-3>!>PR9hIrD4YAER6X*do=qFh?>#IC?-mpPr+u"Aoe'H; o)BX-s8W)srVlire*$/ET7VccbjOZh`RC,s]$gK"$HpT5_n k5PqapA+OXo`+si`RC,s]$gDu)9pLSdE8k^2@0d!#6Ou,!&2X`lfR$Tda-1mbSJILdF$+FAJPCV #mCD3!!#O7e(`R,f)F20eBu=Q[[9Ma!&0/\e^2S?bQc/Ormf*6i84Ka~> k5Pq`p%\=Uo`+si`RC,s]$gDu)9pLSdE8k^1e&fP>TNiWY5)e1Fd*;R6#0?!,e]>tmhZ&*~> k5Pq_o_A4So)Jag`RC,s]$gDu#L1TAdE8k_P5_!^S=5^uDe.;_lfR$Tda-1mbSACKdF$+GQB7T- S=>q1Kj&fRg"tN(%G&eG_Q\i,9jF(=e^2S?bQc/Ormf*6i84Ka~> hZ!fY`RC,s]$gDu$dHuCbeTfo0b"Qt"T8<+2<*f[h:9p!c.^ALc-FY[^n=gM#7:Y9r;[!ulKmfu iqq[ChqQ]$C]FH*S(cE>c2PosaQNQQaNWTBrQXBbk18YVs*t~> hZ!fY`RC,s]$gDu(sU@PbeTfo:d.$&2D[$;,"1GRjPAA6cct1f(!Y%Mc+oub69.+e1c$d7).)Up lK7+'i=+Vue])nU(/gdYe^2S?bRV_Wrmf*6i;`buioT:OjQ?Jm~> hZ!fY`RC,s]$gDu(sU@PbeTfoN1QW2S!T7fAROQ]jPAA6cct1f(!Y%Mc+p!+OIVl2R?rq^9R#dg lK7+'i=+Vue]=(Y6#!7Ne^2S?bRV_Wrmf*6i;`_nhrEbCiTC/j~> k5PqQkjS!)jT#8Y`RC,s]$gAts31hj[Z"NG((giTr;['\io8eFd*Bngbl>omaL7+%+:nter;[$1 ?0CD2h:hbZ%+s+W_fl(\;Pi*td*;R6)9D"?e]?(nn*\PaPDZg+n`Ag0s8Muqrr;ufs*t~> k5PqPkO7m'irB&W`RC,s]$gAts328![Z"Ne76iml1,(."6fR+]f$M^efZaB?'$.MA=[bS=3AWED -mERg==Qprlc8$kPj#Qce\$VcARa:Zek33ir/oVrVc`qrpg#=~> k5PqMjR)?rhuE`T`RC,s]$gAts328![Z"OJU7e$FQB-H%?K0u#f$M^efZaB?'$.MAG*&AgS=,\$ GY`_MkMtCPqq2D1e^24'9f[*pg==Qprlc8$kPj#QceRmQaFSngX4m%"h>[HRrVc`qrpg#=~> nGa@(r;HQms8D*Mp!C(EX0U%DnbU5RQ?Tpgp<3f[Xa`u"+;P=fr;[!ehV?`1c.'rB_l\GU-lWd* r;[!ADW0O-eG7K"ccW\G!!$0'gXXWprlc%skPj#QcbHNe]0&80LpsgjVT?^T_?7]ck5OrOJ,~> nGa@(r;HQms8D'KoZj\:Vln58nG:,QQ?Tpgp<4/eXa`u=9gq0&1+t'u:#=[WdEU1b&]:`$:J+2E 3\rKC-S]i(i7c]Vd0n:C[YYe3@'887ccuI5)T_+@e]?(MV6IE`1a@RXZD+&6_ns7+_RSb-o)F4~> nGa@(r;HTns8CmEo>mo#SYsHumeOiNQ?Tpgp<4/eXa`usVP9WMQB$;tA_u4odEU1b&]:`$BT/a[ S=,Y"F]jOZi7c]Vd0n:C[[ol@EO=KNccuI5)T_+@e]?+JS# o)BX-`grj"USaumOe/5=A2uk\TV-Mb`RC,s]$g;r#f3Qh75cM3%0c\1"CTs9e'HOf#g:&`H9a;6 (_$?C"X8!Ag!e7CbmVb=`k@N8!,T@&e'@p9)9D"?e]?"1HA[5>76G4:N/NXXL4k56Jo/d&s*t~> o)BX-`grj!U8FllMO9p&>;%m6R[\HU`RC,s]$g;r&](Mq78?`O4uY5M.3ERM66Q^Y7/^rQH/#kPj#Qc(8m-GW7f@?@IW^LQR:DK7nhnc11ga~> o)BX-`grftU8=cgH&IcE9,Sd=Nf\89`RC,s]$g;r&](Mq7=_>4Ssu%*H!GaNf[J0rbS&+8WHq`t W1olOQ&0TgMVHh5cMGcnbfe#>WB;\^`7i\crQH/#kPj#QcC/0aBI"iD9lG8uH&dT3JqJVkc11ga~> nc'Jt?>!tcB52^BH?Nmt77^3cFu[ace!>3hde:lj_Q82T2^o^V"oSE(@+j/[cM5W]bOWmTbP'-c bJC0(5W1,3%K-81,&u;fdEVO3$d6]7aJ1: nc&W\?N+ROAncO9F)P>S4?l8=D`>q[e!>3hde:ls_Q82T9iO\J2_m$8+)0E[da%X3r6+ZTr6+fX '$J+jH:Ub283Sga/LEB$g"4WrqTK>jb/q`(4?8JEf[A%CbSS@`rmf*:gZ.>Q=^4S!io\Y6qYpEf q=ssgo)F4~> nc'Jt?"IV[AS?0t@9uP]-7CT>?8^!He!>3hde:ls_Q82TH)[U3S!K%Z>^Y5Vda%X3r6+ZTr6+fX '$J+jH<>NoURmm=M0*$[g"4WrqTK>jb/q`4>Z$s7f[A%CbSS@`rmf*;g>CiF91V1?hrMr"qu6Ng q=ssgo)F4~> o)J^g(]F7.rr;koceIl3hde:lj_QJAX3[u*["oSE(@+a&YcM>ZabPTE^bOs'Z bPTH^bPB?fbJC0)5rUD9%fHA2,&u8dd*;I3!6bAa#gL;^VT%-:d*;O5%`mi4e]>tms89@XgY9[# k5Tr~> o)J^g(]F7.rr;kjbhDH0+t[X!g<9'+e!>3hde:ls_QJAX93"SK2_m$8*bj9Xda%[4!6b8^!6b&Z !6b8^s3(5^'$J+jHUpn68Nnpb/LEB$f[eEoqof&`rlPMk`i6&\f@A6mrQGbmkPj#Qbi&O!Ze!ja _:[oMJ,~> o)J^g(]F7.rr;n]`n'Ho%2E=3hde:ls_QJAXD5sD(S!K%Z>C5#Rda%[4!6b8^!6b&Z !6b8^s3(5^'$J+jHWYWqUn='>LiZgXf[eEoqof&`rlPMkb."n*g""HorQGbmkPj#Qbi&NuWm]JL ]%#mBJ,~> k5Pq0akGIm_#OH5`RC,s]$g;r#fEit7QN(A&-i(5"CTm5da%d7%*Qf8`koL(]tqY-b00V)$d6Z3 _S3[r^r+79bPKEgbJC0)5rgY@&,cJ3,&u8dd*;O5s2thm`l,j?d+-b"e^2UdrQGJekPj#Qbi%@X J,~> k5Pq-a4].f]`8$1`RC,s]$g;r&]:f(7S?cU5WCMP.3<3Uf$_kAbRD\;ai;34]tM8#a3)U$bR;V9 `koI&]thP,b00Y*'$J+jHUpn890b9f/LE?#f[eEorQP;b%*?N-`QZ]Zf@SR"cHZ=3#0?!,e]>tm hZ&*~> k5Pq&_:@)R[K$:*`RC,s]$g;r&]:f(7W#3&T:;.+H!>XJf$_kAbRD\;ai;34]tM8#a3)U$bR;V9 `koI&]thP,b00Y*'$J+jHWYWsV4X0?LN6RSf[eEorQP;b%*?N-`QZ]Zf@SR"cHZ=3#0?!,e]>tm hZ&*~> k5YJVs8!)up](9l`RC,s]$g;r#fEit7QW:I&I877"CKg3dE_^7%a2u7^9*lfO-#g-[(sf,qofMl `Oi:EPE(`kUq":_b59BgbJC0)5rg_C&H)S4,BDGfd*;O5%Eli/Z`L7<`m<&[d*Bo6bQu;Qrmf*6 i;`iSroX62~> k5Pqcq"jj^p](9l`RC,s]$g;r&]:f(7SHrZ5rg\R.3<3Ue^;\@bRVhckaQNQQ aNWTBs82fXs*t~> k5Pqbp\Oa]pAb0k`RC,s]$g;r&]:f(7W#6(TUV7,H!>XJe^;\@bRVhckaQNQQ aNWTBs82fXs*t~> hZ!fY`RC,s]$g;r#fEit7Q`FN'*nI9"CKd2dE_^7%a)c&Sp+4/8P)ruM5J/4r6,Yoa0_4\>>.R[ ;Iu)p^rcJt#g:&`HU9bM+;+PP"Z1nbf$M\@bRD\8]r?a/F,d6@bg"E/bRV_Wrmf*6i;`__f%Sip g#i hZ!fY`RC,s]$g;r&]:f(7SI#^69-eS.3E6Te^;\@bRVe7]:O4I9M%fVCPeq*b5BHnbK$uWHWs07 8lB5bXh`'UbS&+8WI%35>tmU@0e=S hZ!fY`RC,s]$g;r&]:f(7Vo3)TU_=-H!G^Je^;\@bRVe7]:O4I9M%fVCPeq*b5BHnbK$uWHWs07 8lB5bXh`'UbS&+8WI%`pY,7k[P_EsdQe9m9c2PoqbK%;qL37QsX2iH/c2,WoaQNQQaNWTBql%ll e[X)js*t~> k5Pq8dGNj0aT);=`RC,s]$g;r#fEit7QiRR'*nI9"CKd2dE_^7%`uGbH k5Pq5cedO+`W,u:`RC,s]$g;r&]:f(7SI)a69-eS.3E6Te^;\@bRVb/Wd7oO<)HCa<,7N,aSj k5Pq/akPRn^]4?4`RC,s]$g;r&]:f(7W#<+Tq%F.H!G^Je^;\@bRVb/Wd8)\>?Y-4Ap]`SaSjR?WDBEMD(7cclC3$H^)bMJ\!=S>*mCprjYtkPj#Q dFm4-kV=_2TAeYA]`%j.r;?Nnrr2KgJ,~> o)Jah)#F()q>UEkal2I@Rr!,0hWWnk`RC,s]$g;r#fEit7QiRR'*nI9"CKd2dE_^7%`c)K?t"qJ Htu8+3I8E]rQGeqaLd@?t,F+ed*7m:F3L5(MBIXHTnI:LFis*t~> o)Jah)#F()q>UEk`S]n8Q"4]kgumPf`RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\@bRV\'Rp]oq PDXcs:/lhZao0EobK.;RAQk@`MKic2@#qn?qog2+_5VcQ:gHd]1GLI-<124^b/VE>`4i@:Cfu"B o)Jah)#F()qYpNl]\DQ$N)j/:f]Ci[`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\@bRV\'Rp^EB XJhhWNJ`n5ao0EobK.;RBOn9IVk037LRl#tqog2+_5Vc`I'0?>R?WGFG,3U6b/VE>`4i@:Cg`F> USYoPprjGnkPj#QcBVOM?6pp?7qZg[Fbb?krd4`Vb45L^~> o)BX-^QY%RR@KkGNgQ6#@66bZQC2UD`RC,s]$g;r#fEit7QiRR'*nI9"CKd2dE_a8&'MqsN_a,S Q%<[+'./1IbPfWqbK%#<h_5VcJ3]%rkr;ZsdMnlg)ri6I,TS[P]:j-Ud REEe!q90_tkPj#Qc)?5VMEsWkD2sg@OIMAnNfT-EdII6e~> o)BX-^QY%QR%'YCLm"!a=>;a4O-FG6`RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\AbR_n9Z&VUr Yc=.J>X1i9`QS5(&'N#$HrjISX."QM79YqmbPTKqbJC0)8Pi\e5r18K-rlGG\$#_2%&onVDGF/c M*@WUc-?(.)9D"?e]?"1H&I854"k-"Ner[_OH,<\N-Wh@s*t~> o)BX-^QOqNR%'Y@G(b[(7iNR:K8=0n`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbR_n9Z&VV2 f[S-_Y*b0:`QS5(&'N#$Hs::9e]bh7S;O,ZbPTKqbJC0)=F@9KTUD%%G^Z7#\$#_2%&onVDGFK6 `0ddOc-?(.)9D"?e]?%.COU@N,9'U-J9cU1O,f0YMg<_?s*t~> o)Adjf=h-u'>hbUXKo3hde:lj_lnY]4Z"Dt#Q4W*?eclbJC0)696qG&H_q3'f^,eDIR!^@:*,15;Z,R 3!$).d*Bo4bS\Farmf*8ht6K\VlHN'h=^1%rVulprVZZas*t~> o)BX-f=eu7^VI_+OJf7f@js0;Z`]AL`RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\AbRht8XG'/M MgfGD3@RFgcd0l8bRht:Y]<4tL0=cd0l6bTb6HWI%35>tmUB1,(:ZHA?BI?t!JN=A1q9 o)Adjf=h0u'YqQ(J=NI9<>6?FX/1^5`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbRht8XG'/i `P8d\P^%AMcd0l8bRht:Y] o)Jah)#sU6rVliqcfOKITmD6Yg?[ho`RC,s]$g;r#fEit7QiRR'*nI9"CKd2dE_a8&BhtmKLSF2 67N`Q!&1"mdEV[7&Bi%sEC`8n6RN?H!*>uEdE_[6$-U/aHU9bM+;"\S!!FoWB2Lqm4q'M32Eb2I ('(SgdE^#5bRV_Wrmf*6i;`c-l0IZllfS4t~> o)Jah)#sU6rVliqbiA$BS9'"Bf]qJj`RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\AbRht8XFs&D G%aiC0-`lZda?>=bRht:Y]39+GA0r=1*pJ0da?AtmUB1GUX\C2?d5rB:WO2` o)Jah)#sU6rVliq`8Be/P@eTieEGf``RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbRht8XFs&d ]=bRht:Y]3NJ]frB:WO2`F'O [%CR;eBuRdqTKGjkPj#Qbi&O"d,sQUg#qMtJ,~> k5PqBg?%GOe,TIH`RC,s]$g;r#fEit7QiRR'*nI9"CKd2dE_a8&BhtmKLS0l/JeZm!&:1sda%j9 &Bi%sECW&V/ee k5Pq@f]D5KdJs7F`RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\AbRht8XFs&?B3.M^.Nh*SeC)YA bRht:Y]3<*BNRVX/L"`*e^MhAbTb6HWI%35>tmUB1G^dT k5Pq;eDfK=bl@_A`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbRht8XFs&bZ`BjiIq!&leC)YA bRht:Y]3NJZ`KjfL1bn=e^MhAbTb6HWI%`pY,7k\R$a5.Q%NjW>[:ZC?XRMmV65_S^t$lYc2,Wg aQNQQaNWT!s*t~> hZ!fY`RC,s]$g;r#fEit7QiRR'*nI9"CKd2dE_a8&BhtmKLS'_,nL4[!&C;!e'@s:&Bi%sE_&/M ,n0kT!*Q8Oe'@m8$-U/aHU9bM+;"\S!#5SC)DFutCiFNEJU_]2*WR_%f$_jgqTKGjkPj#Qbi&O$ qY^ hZ!fY`RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\AbRht8XFs&=?Vj!F.3CmQe^MhCbRht:Y]tmUBr\G9H4%NLqJr5PLQC=5&D+H':^"1WWc2,WoaQNQQaNWTB s8)ZlrVQNTs*t~> hZ!fY`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbRht8XFs&bYGe+\HsUBce^MhCbRht:Y]7f$hqBbR)J/WI%`pY,7k\rg4FgS>*'o^r475b08)M[[hYK_Ud/\c2,WjaQNQQaNWTB s7uZl!W)`Vs*t~> k5PqYn+?8Am/R+a`RC,s]$g;r#fEit7QiRR'*nI9"CKd2dE_a8&BhtmKLS'\+q=_U!&C;"e'@s: &Bi%sF%JAM,7FSQ!*Q8Oe'@m8$-U/aHU9bM+;"\L!"K&6%29Wq$NL/`][kNVc2,WtaQNQQaNi]8 o#R]-UUo:SmHjiPrVl9cJ,~> k5PqXme$/?li7"``RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\AbRht8XFs&<>u!R@.3CpSf$hqD bRht:Y]EK-?;E^>.j8H(f$hqBbTb6HWI%35>tmUB1GU[:.k)kp.4Zr&2Ea>m3%Q7?^XgiYc2,Wt aQNQQaNiZ6n\q6"T"!MJm-FZNrVl9cJ,~> k5PqVm.0`7l2Ue^`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbRht8XFs&bY,7hWH l2M@lr;FXSd*]nNrVZT4e!>3hde:lj_lnY]4Z"Dt#Q4W*?e.=fBp#cMYlmbJC0)696qG&H_q3!W`B.q?[N8#6b//!%>G0da$,6bSnRc rmf*;f$`(42E2qThUfHQo(2GAnFH,Frpp)>~> l2M@lr;FONcd9VFrVZT4e!>3hde:ls_lnY]9jLR_3AN6:+)'9VdE_a8&BhtmKLS^B:-LNa+%)pV e'@s:&Bi%sF%f&0:H^H`+_7Y)e'@m8&BhnhHV%%>9gLWm0eXq$,9d%.%Nlo>,pt/l-6G.Zf$_jg qTKl!kPj#QdF?LjkU\>'SDD`+[ds%\nF6#=q>TsbJ,~> l2M@kr;F:@b0%H.rVZT4e!>3hde:ls_lnY]D6Bh2S^P)QdE_a8&BhtmKLU'dVOs0#?"L.h e'@s:&Bi%sF&QJMVk00*@sbN4e'@m8,0Rg%HWYZuVP'BFP`9p%>#@pb92&/`?tj_0Ea2P[f$_jg qTKl!kPj#QdaH4]jVnpTsbJ,~> o)AdirqHEc'`J%+W6a2dIOI!mnCl.Y`RC,s]$g;r#fEit7QiRR'*nI9"CKd2dE_a8&BhtmKLS'\ +q=_U!&C;"e'@s:&Bi%sF%JAM,7FSQ!*Q8Oe'@m8$-U/aHU9bM+;"\S!"BQ*L9Ie_XK/A"PCI-h rW!).f$_jgqTKhukPj#Qaa,d=/5/l:3Bg8JAk+L+1,:KcWq$+>~> o)AdirqHEc'`J%+Uu!R@.3CpSf$hqDbRht:Y]EK-?;E^>.j8H(f$hqBbTb6HWI%35>tmUB1GC@4:lVcdYck44XI+`_ 4Wk^U\(9!Qc2,X%aQNQQaN8KK734fd7kHJd=^X]O/MJk2"KM"lJ,~> o)BX,rqH-\p\k*ePJu)=C^:SZma]5E`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbRht8XFs&b Y,7hWH nc'J]2a^#%6:P.&AR._o9MJ#F>nk5re!>3hde:lj_lnY]4Z"Dt#Q4W*?e.=fBp#cMYlmbJC0)696qG&HVk2"X8'Kl0%.,i=+Sqd`R1V! !'IZda$,6bSnRcrmf*;d`]qp2`W1Sf?q1=kj%BkjQ#@prpp)>~> nc'J]2aTl!5t"jp>u`mH6UX$q<>3hde:ls_lnY]9jLR_3AN6:+)'9VdE_a8&BhtmKLS^B :-LNa+%)pVe'@s:&Bi%sF%f&0:H^H`+_7Y)e'@m8,0Rg%HV%%>9gLTk0.Ao1j6>dgio8qRf[@s' /Jo#.f@%shqTKl!kPj#QdEoqXh^pK"QdsBiZfgW;ioK.]p&=O^J,~> nc'J]2F'Po5XSIT8O4OG/M8=l5nh2Ue!>3hde:ls_lnY]D6Bh2S^P)QdE_a8&BhtmKLU'd VOs0#?"L.he'@s:&Bi%sF&QJMVk00*@sbN4e'@m8'$J+jHWYZuVP'?BN-f-!jm)'kro+@Dg!n o)BX-dBg o)AdjdBih\'XtTdNhWGO?n4$9Wi(R0`RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\AbRht8XFs&< >u!R@.3CpSf$hqDbRht:Y]EK-?;E^>.j8H(f$hqBbTb6HWI%35>tmUA1+jt8Ja)I_j5AhPh:gH, R5kul]@kZZc2,WuaQNQQaNiZ5nAq)jQF5N>l0/*Grr2iqrpg#=~> o)BX-dBg9h[C3TXI?pFs;&1*BTUgYk`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbRht8XFs&b Y,7hWH l2M@gqYSCNa2l-:qtg60e!>3hde:lj_lnY]4Z"Dt#Q4W*?e.=fBp#cMYlmbJC0)696qG&HVk2'dIk[i7ZW.dF-Ijc,Z29!!'^feBcD9 bR)ARrmf*6i;`iOrqc`kqr[p/~> l2M@gq>81G`Q#[0qt^0/e!>3hde:ls_lnY]9jLR_3AN6:+)'9VdE_a8&BhtmKLS^B:-LNa+%)pV e'@s:&Bi%sF%f&0:H^H`+_7Y)e'@m8,0Rg%HV%%>9gLTk/LEE)iS`D=da?LkdET_&:(RiKgsjZp qTKGjkPj#Qbi&O$p\Odaq"s4RJ,~> l2M@eq>7q:^VIClqYC$-e!>3hde:ls_lnY]D6Bh2S^P)QdE_a8&BhtmKLU'dVOs0#?"L.h e'@s:&Bi%sF&QJMVk00*@sbN4e'@m8,0Rg%HWYZuVP'?BM0*$^iS`D=da?LkdET_2D_`WWgsjZp qTK8ekPj#Qbi&O$pAXmhp\O%PJ,~> k5PqVm.0`7l2Ue^`RC,s]$g;r#fEit7QiRR'*nI9"CKd2dE_a8&BhtlK17p[+qFeV!&C;"e'@s: &Bi%sF%JAM,7FSQ!*Q8Oe'@m8$-U/aHU0YK+;"YR!!OTggtL9)r6>YmaJ2Q3!5@4;ccu=1#0?!, e]>tmhZ&*~> k5PqUlgjW5kl:\]`RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\AbRht8X+Nl;?;EaB.3CpSf$hqD bRht:Y]EK-?;E^>.j8H(f$hqBbS&+8WI%35>tmUA1+ah2Hed?)d/;/rbK-Vf%h-8.eBcD9bQc/O rmf*6i84Ka~> k5PqSl1"3-jo>AZ`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbRht8X+Nl`YG\"YH hZ!fY`RC,s]$g;r#fEit7QiRR'*nI9"CKd2dE_a8&BhtkJOVd],7anW!&C;"e'@s:&Bi%sF%JAM ,7FSQ!*Q8Oe'@m8$-U/aHU0\M+;"YR!!XWeg"4Wrc2Gldb6YFt4$CX;da$,6bRV_Wrmf*6i;`c0 lg=)um,n=u~> hZ!fY`RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\AbRht8We!W8?V`mD.3CpSf$hqDbRht:Y]EK- ?;E^>.j8H(f$hqBbS/19WI%35>tmUA1+jq4H.gfsc-?42s2t\VJ4N2Og!\0jqTKGjkPj#Qbi&O" f]qhihs9G)J,~> hZ!fY`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbRht8We!W]YG\"YH k5Pq\nb2\In,NFd`RC,s]$g;r#fEit7QiRR'*nI9"CKd2dE_a8&BhqhImcI\,S("X!&C;"e'@s: &Bi%sF%JAM,7FSQ!*Q8Oe'@m8$-U/`H9jSM+V=bS!!OQdf@Jj_536VdFHUhq90MnkPj#Q cJS?e`N?DG]A<5Nn,E@drpKf:~> k5Pq[nb)SFmf3=c`RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\AbRht7W..33@8K0G.3CpSf$hqD bRht:Y]EK-?;E^>.j8H(f$hqBbS&+8W-V$3?;3^B1+jq4H.UWoc25]jc,?ZLRF9X4cM>ZtaQNQQ aNi]9o>7N1VmtLTn*^2Trr2?cJ,~> k5PqYn+?8Am/R+a`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbRht7W..3WYc"+ZHZtaQNQQ aNiZ7o"1WlTX!,Am-FZNrr2?cJ,~> o)JahrVI<.s8:C1mFe=gM4iJfkP3$FQ?Tpgp<3f[YC]5+1aE_>r;Zt-f@A9orlbqs_52HF5W^8& "98EWU=Rf=rlbqs`NXAS3Ar)j!<<+&W7KGCr6,Jj^SZ3@4?+Ms!WE'&,&l/ad*;I3$dI#Ge^i3q b/hWFbP]QuaQNQQaN^fCQ]j[%3,t4OPE_l2R/WKSL"5_nJ,~> o)JahrVI<.s8:=-mFS"ZK:CEYjnHaCQ?Tpgp<4,dYC]5<>u3mK1+t'uB\1e^cMl$!bJC$$6W.Aj 5;al36`/G`cMl$!bJg]+9L`>a4YeN2A?4;1cMYm!bJ0ou85`hj5r:>L-T68%e^2SLaijV+'?KA9e]?%7JWbpL3\>',P`:a!RJiKRL"5_nJ,~> o)JahrVI<.s8:%"ldD/?Fct,;iqLF@Q?Tpgp<4,dYC]5\Y,J%aQAp/pHe6fqcMl$!bJC$$6[5l7 T9ae/AZaP4cMl$!bJg]+;dcf,Ss=n;Ha;5OcMYm!bJ0ou=+%3LTUD(&F^9gTe^2SLaijV+'?KA9e]?(3F+o&f+rOO8LOOiJR/WERR"a)\s*t~> o)BX-dBBm`[(!W[QDgmhBf8+`YcET=`RC,s]$g;r#fEit7Q`OR'*nI9"CKd2dE_a8&Bhh_Glcd:"\ _7@%d`5p.#bSeLbrmf*6RV[Fk6qBRLGCY9uK6;6WFE1L'nc++~> o)AdjdBEPU'XtWdO.rSQ@4O-:X/Ld3`RC,s]$g;r&]:f(7SR/b6THnT.3E6Te^;\AbRht4U3An) @Sf9H.3CpRf$hqDbRht:Y].j8H(f$hqBbS&(4Ui].+@89*F1+jq4H.UTnc2,Wlc-Ob_ b/(Wt]#;\5rQH/#kPj#Qba)[]Ai`+6<,[&/ItDc`EcZ5&`ps(Z~> o)BX-dBBj^Za[HVI[?Y";AL3CTq6hn`RC,s]$g;r&]:f(7W#<,Tq%F.H!G^Je^;\AbRht4U3AnT Z)F:\HX13bf$hqDbRht:Y] nc'J\2F9f"5t+q#AR._o9MIuD>nb/qe!>3hde:lj_lnV\4Z4T"#Q4W*?e.=fBp#cMYlmb.XNi5sIFT&HVk2"X7p;e^2S:bl>onbIsdBS >Ej:b5KO&aQNQQaO/c!hX3d$19'Epai=/Ur:]p[rVc`ds*t~> nc'J\2F0\s5t"gn>uWgG6UX$q<"m3he!>3hde:ls_lnV\9jUX`3AN6:+)'9VdE_a8&Bhe\F?a#> :d-`c+$ugSe'@s:&Bi"qED&c.:d$Qa+_7Y)e'@m8'$@n^E_'2>:-g]l/giQ&f@A6mq98l`#g9oY S"-=e`QJ/')T_+@e]?.lgth?Y4X[)fi5NS/rquKbp\k'io)F4~> nc'J\2F'Po5XJ@R83e@E/M8=k5SD#Se!>3hde:ls_lnV\D6Kn3S^P)QdE_a8&Bhe\F?bej Vk99&?=g4ge'@s:&Bi"qEDg2JW1K9+@sbN4e'@m8'$@n^E`[drVkBHCMKN3\f@A6mq98l`#g9oY S"-=e`QJ/')T_+@e]?1kf@oO?,o3*3h7^>prquNcp\k'io)F4~> o)BX,rr2imqu6WmVU!oaIOI!mnCl.Y`RC,s]$g;r#f<]n76NRV'F4R:"CKd2dE_a8&BhbXE'6KJ +q4VS!&:1tda%j9&BhtmCdgBJ-4BnT!*Q8Oe'@m8$-KoQDF$QL+V4\R!!OQcf@A6mprj&e]qKU] C43+qaSj o)BX,rr2imqu6WmTZc$VG9&,In(GkS`RC,s]$g;r&]1Z"787,d6THnT.3E6Te^;\AbRht2ST$_r @88sB.Nh*Te^DbBbRht8XDCHuA5PKF.j8H(f$hqBbS&(2T5I>/@80!D1+jq4H.UTnc1oKhb.XKl Anlao]u^/r%`mi4e]>tms80(Og"F-lk5Tr~> o)BX,rr2imqu6WnP/Yutms8/bAe^_1Yk5Tr~> l2M@lr;Fg\e'lL]rVZT4e!>3hde:lj_5_iL5<:/+#lO`+?ImZScMl$!bIa0X4$knL$j-J00qd"D cMl$!ah=?T5s@gh$ip>.=fBp#cMYllah"$Z5X.1K&,cJ3,&l/ad*;C1$Hg>fHqJTr;3/H7rQGSh kPj#Qbi&O$qu6Npr;GdXJ,~> l2M@lr;F^WdF-+TrVZT4e!>3hde:ls_5_iL:17$f3AN6:+)'6UdE_a8&Bh\RCHGa.9ft6_,"/3T da%j9&BVVY?pmV(<';ue+_7Y)e'@m8'$7_UCeJ&@90Y3f/giQ&f@A6mq902g`N";XLP'N4^tms8Vuqr;lfok5Tr~> l2M@kr;FLKc-=,>rVZT4e!>3hde:ls_5_iLD6U"5Stms8VrnrVcWkk5Tr~> hZ!fY`RC,s]$g>s$Hp2_GX=YW-PQgerW!(-f$r*mrlcY1\^-bfe/GY),n9 78tfH!<<+%Vq'5@r6,Gg\XR=u4uF>jr;[!FE7`Xec2,WmbK%;[?93L:8"e#Mc2PokaQNQQaNWT! s*t~> hZ!fY`RC,s]$g>s'$J%gGX>28;F+>[9rc1GLL02L3p)d*;F2%*Q])P?p>4H"X%tcHZ@4#0?!,e]>tm hZ&*~> hZ!fY`RC,s]$g>s'$J%gGX?;QWM,iKO+(Pcf$r*mrlcY1\BR'*9XR?`YQ?AR4`d*;F2%*Q])P@$M_]VW'ZcHZ@4#0?!,e]>tm hZ&*~> hZ!fY`RC,s]$g>s$Hp)VEBZcR-PQgerW!("eC2dirlc_2[ZFlT68]bk!rr hZ!fY`RC,s]$g>s'$Iq^EB[N9;FSeC2dirlc_2[ZFlT@pM\h2(g:@JB#OG`PfEhNFk:: >'X+11+ab^W6ru:bfg"0'$7YOB1cH88Nnpb0.Ai)eC)[eqT95d]rH3J=+s[X[aEIIrlbknkPj#Q bi&O![b0Bj`7jARJ,~> hZ!fY`RC,s]$g>s'$Iq^EB])]WM,iKOFUtfeC2dirlc_2[ZFlTUopPbR#?33PfCY[`PfEhNFk:@ EP/ k5PqCg?.PQeGoRI`RC,s]$gAt$d6Z"QXr0I5:@0M!WE'%4OVX'c2PmAag6k.1dX#<$3C2-)Hs/: Uo1)YK5!t?9k[Zi+3 ^=:QUrlc;%kPj#Qd,!jGi+Qk0YiFuue,97Fr;?Qorr2KgJ,~> k5Pq@g#_>LdJs7F`RC,s]$gAt'?eM*QXr0^AQ(oU1GLF+9[_>7c2PmKag6k.1h0c^4#AWB4)1dk Uo1)YK5!t@/8Y8cMl$+aQNQQaO&`'jkaBT;lep@e'\*nrqu]ns8N#gs*t~> k5Pq;eDfK=bl@_A`RC,s]$gAt'?eM*QXr15ZDjOfQ]Zo:CXUVVc2PmKag6k.1o7=MSXGOjI>k2% Uo1)YK5!tCB;`; o)Jah!;lcl'`S.0d,jTJTmM!sW%KZP/)&HMdWh,]L S=>t8R$EPS@6lhj6PE&5f$_kCbSeLbrmf*7Vh'qd5t!bMN093^Ru`SjO,eJ4nc++~> o)Jah!;lcl'`S.0c/\-CS90(Cf^%Sl`RC,s]$gH!C!4Z>_4lHX1Lb#r69-hV/LNs/`P]X7b0%f0 KiBm=>"1M/0eYCXLO+&cFD+cH4^*5eAk4m:0IgIr['7*WaND`M`2n_p8nVR)4u>#K/2h7PWh,]L S=>t8R$EPS@6ll%B/j;Bf$_kCbS8.]rmf*7UNhZI2`i^'L5^qGRZ o)Jah)#O1,qu6Wn`Sg"2P\4cke*,]_`RC,s]$gH!C!4Z>_4lHX1Q&Q=TU_C5LimOV`P]X7b0%f0 KiC"-XJ25QPDk6PM0a8eFD+cH5&$\)ZD3nUO+)q]['7*WaND`M`2n_p@%aa#T:)%.KlW$ZWh,]L S=>t8R$EPS@6lrEXFcDUf$_kCbS/(\rmf*8SnWUn,U47,G^jsdQ]D$(!I3#^s*t~> o)BX-f=AQ-]tqV*R'![(Cc"4a\[%@Y`RC,s]$gK"%a2u6^9EB'5qc5,-4pF_rW!K\\?;p<]uA(5 XFEu&67iiSrW!K2%UE749hIc85"TC(<%8F^rW!TlH_[17]#Mn9b.XEl8jH<<*";rJ!#Z/4R=TBt E,fi9DJa,q;F3/uGV0>&daupmrlc8$kPj#QbEc[_Ajo9X=`8S7J9u'GCMRWY_X[YV~> o)BX-f=AQ-]YVJ(P,PRkA199<['#JM`RC,s]$gK"CHH[F]C[b1,1@ZKqk6A]#Mn9b.XEl8ka278Nemc0eY4`R=TBt E,fi9DJa,q;F30!N`RfEe(<$nrlc8$kPj#Qb`Q%J>Wb51:M4`mH[9F@Bkh o)BX-f=AN+]YMD%JXrX<<>6?FXJLg6`RC,s]$gK"C]uA(5 XFF#4VQ6DZQ^*_pN-eo+9hIc862$8LGo24&oa$Cj'Q-BPD*Q_X[YV~> o)BX-^Qt@ZS"$"HNL-'!?opYYQ'cCA`RC,s]$gK"%EcSpSU=6r0fE8u)[QTE!"^6(K6`$>[)'YW G=O_\-4'_O!"K)V4@;S0:esn\5uM2!"8r34%R>#]H&BbgbJgH0=?/`O2A-5l!!Ofj o)Adj^R"hf':l"UOd(o.0fN!kP\Spke!>3hdeh6ib/:QVHr)p[2fiG"3]&TH0J5VJK6`$>[)'YW G=Y,3;*$Hd0JP=?8lSV^:K^q4BOl%/3&EBG0J5._I"d9>b08&JWI@r0:jQG62_m-C0J5tA>>.X] r_*8`%89]"<**UQ4!&.jf@%tDbSeLbrmf*9h o)Adj^Qnbd':bh?KSP.L)^%FuLM#5Ye!>3hdeh7'b/:QVHr)p[32!OPS=#S*O,8IVK6`$>[)'YW G=Yc5W1B07OHZ$!Q%NaT>\SD?UTCMuQ'R`$Oc"RAOd2K=b08&JWI@r0C:paCS!K>'Oc,*MB2h`) :f1(d:JtD&>[i)lO]mFlf@%tDbS8.]rmf*9h!41JH?jM1g[3eMr;Z`pqu?Zcs*t~> o)J^g(]F:0rr;o2gu[R[>%3EMjP0@He!>3hdeq;ubK7GmKO@565=eZs%KlV/+VRdj?Y5(o^;?Ub 76Ws[";DCJ!!*0;-pp[*Lt2fsuS!"fr&83pV.c-*tnCe6NR8g=TCqu?s?8NoR;:f:6k<"2o= BPD-b-jKrdgXjm!rlbknkPj#Qbi&O"b2VLCe)]ZkJ,~> o)J^g(]F:0rr;o.g>q4P;-J\4jOs4Fe!>3hdeq<`bK7GmKjmP>6W%bt4YncF0.nh:B6.[$MlaY0 U2i[o@nejG3\E$>1dt>O@qKUp>tA(QR4BpE0.e\"1.bSLFO&.S_OG@52cafl3&:Id#qdks?rL'0 ?!gt o)J^g(]F:0rr;r$e`#A<5t?:YiRIM=e!>3hdeq k5Pq8d,3a/aT);=`RC,s]$gN#$HpN"Q?#o*@:3.jq#DT@&3i k5Pq5cJIF)`W,u:`RC,s]$gN#>0G!rQ??GADfKo$0eFn*.4?Sl.Q(C0=E(1U_k(^4 k5Pq/akGIl^An63`RC,s]$gN#:#c*h5.IX6$G:II?2B:72nbJ''K9imp_P_4I4rd"itJW#np[(3f]r4NKM\Zh`T <_A5)f[7tCbQc/Ormf*6i84Ka~> hZ!fY`RC,s]$gN##g: hZ!fY`RC,s]$gN#%a2r'Q[Ngm@nA@'+s7pJrZ+'?*?QFK+X*6!`gLI)>%0iOCRn?J+!N$[-mp8[ (-[l=i5[&_*uu77&/I6FakbLnbIs*Y>u37),9J!**=*].+XAKf/h\mV0`Nd5/Li"a'/b[)f[7tC bQc/Ormf*6i84Ka~> hZ!fY`RC,s]$gN#>0G!sQ]S)SZ)!G-Am\bF='&F&=']Za`d0JQ7Mbh^gqbIs*nO/.h`BjOtF=BAR+>[qPqK8GMGrJqAHMhc^r9fGb1hq6?$ rlbSfkPj#Qbi%@XJ,~> k5Pqdq>:'bp](9l`RC,s]$gN#$HpT.Y^eSN)Aa),p`)12()7PuS(5QA>rtkG%g#S)]P\c##mU\E &MeL:iT/k@EYAZf(HmYcf\G?;e'?.SX]HDW(E;ob!>lRK'`JgO)*sD@f$M\@bRqqZrmf*7i;<;g aNMZEhu!3Cl2Q8~> k5YJVs8!)up](9l`RC,s]$gN#%*Qf0Y`)UI3AN*;/M6oPs""+#2^g""-_])2O^1%^@QuASXhT'4 +W_[F+X91!\DlT+c[$fR+!jF<]@l#sg=+?j_l[fB3A<-=/c%"'/M8\10JP11r$i%&-mg/b8sS5` d*;R6&]j/7e]?"mqY7qHai;6TqtKjLs*t~> k5Pqcq"jj^pAb0k`RC,s]$gN#%a3#2Yb[_BG@t&Z:JFMUrCTRQ9hS&K9WcjcO^1,$G/. k5Pq0akGIm_#OH5`RC,s]$gK"$-UK5\tP^jW2nhNlSn"?YcXqLfYiMB2,lXG!\02pj1BP,5!NhD kjI`th:U9/FZk&ekj@^!in2`+c-4AQXI,Nlk3;X+s6BXK"3[jJX8T+*XfekYk2t@Fc2GiiaQNQQ aO:(B&bIq>+fFM[_S?!IrVQKjs8N#gs*t~> k5Pq-a4].f]`8$1`RC,s]$gK"$-UK5]r@g+X01CUlSn"AZ`pUXfYiMB2KE7i.RKHdj1]q=7n@Wa kjI`th:U9/Hq&V'kj@^!in2`+c-4AQZD"&0k3;X+s6BXK"3n*RY5PO0Yd(LdkN:IGc2Gj)aQNQQ aO8epg@HdG'r'jH^:X:ArVQKjs8N#gs*t~> k5Pq&_:@)R[K$:*`RC,s]$gK"$-UK5_n!%VZF&TclSn%G\[T&qfYiMB339\Ul0[g"in2`+c-4AQ^9k/bk3;X+s6BXK"4+H`[/IB<[^Wd#kN:IGc2Gj)aQNQQ aO8_if('^m!h89([^,i1rVQKjs8N#gs*t~> o)Jah)#*aupAP!cWmKMfJ1`g'n(Z1[`RC,s]$gDu#L1QAf\PNHk5OK;j=93Hg!dZ\7ke")$j%2j jQ>O^iT'%_ki_$ag!e?reC`U;kNCj]g=FWrc2Pojc-Oeghr=4fs5X+>rT4(A"m+^ue'H;;bSeLb rmf*3I5t4]9M\GH6W@T19JnXd3&M,Hnc++~> o)Jah)#*aupAP!cV9R`\Gp=qYmb5nU`RC,s]$gDu#L1TBg"kWIk5OK;j=93Hg!dZ\7ko9t4Y&4? jQ>O^iT'%_ki_$ag!e?reC`U;kNCj]g=FWrc2Pojc-Oehhr=4fs5X.?r8mt@"m+^ue'H;;bSeLb rmf*4Ft-*16:OBt3)!Re9/JI`2DbiEnc++~> o)Jah)#*aup\k*eQH.VCD[mOlm+0/F`RC,s]$gDu#L1TCg"t`Kk5OK;j=93Ig!dZ\7l.f^Spk4> jQ>O^ioK7cki_$ag!e?reCi^>kNCj]g=FWrc2Pojc-Ohihr=4fs5X.?r8e1GkN1[Xe'H;;bSeLb rmf*5E"k,:/2/ap+>u_g75H_V1c#QBnc++~> nc'Ju?"Rb`B5;dDH?Npu77^3dFudjee!>3hdeV)jc-F\eg"ZJ\q:kr).GNM)bdDFZ3EA$G!=!#b hr!;JiSrkUh:^?,cd'kaf%]$;gt:0*d*Bo7bQZ8;dF?k'n_4*"h>5t3h:pT3ccuC3)T_+@e]?(P XgGc'4t_]%\#?4G`P]I,^UWG*o)F4~> nc'Ju?"R_]B5)X;F)P>S4$Q/=D`H"\e!>3hdeV)jc-F\eg"ZJ\q:kr).GNM)bdDF\5BpKs/L5;e hr!;JiSrkUh:^?,cd'kaf%]$;gt:0*d*Bo7bQZ8;dF?k'n_4*"h>5t3h:pT3ccuC3)T_+@e]?+N Vm3`d1a@U[Z_O88`P]I+^UWG*o)F4~> nc'Ju>\%DWAnZ:!@U;\`-7CT??8g'Ie!>3hdeV)jc-F\eg"ZJ\q:kr).GNM)bdDF_::m^#JmaPi hr!;JiSrkUh:^?,cd'kaf%]$;gt:0*d*Bo7bQZ8;dF?k'n_4*"h>5t3h:pT3ccuC3)T_+@e]?+K SZ&b7)\7+nX-T'k`59:(^:<>)o)F4~> o)BX-`hB6+V5:,oOI`#9A2uk[T:^;_`RC,s]$gAt"3o-=deqSjdf%W,dEfh5Bgb9d1("$^8As-4 f@U#S"P;/Ncd)O7$-prIeC;socd'f3bQc>;d*g@ke+M/he,IkoeHF@Ld*Ku6bSeLbrmf*9hXKs[ P*Ls7hsojir;Zcnr;6Kmnc++~> o)Adj`hE@-';r$jQ^aCI0/cdtRrmH)e!>3hdeM#gc-F_br6tMjrR2A+cc3#"7o+834"_[[`n/kp fDaD*f@83ocMl&pcI(+jeC2djcHZ40#0kH?daHUnp!Wlhrm^hq"OkfGccu@2)9D"?e]?(mn*SAY OG:*unDrU,s8Monqu6Tbs*t~> o)Adj`h<=,'W%\.G`%QA9,Sd=NK8&5`RC,s]$gAt"3o-=deqSjdf%W,dEfh5BhiBRXdt5YI`^3o f@U#S"P;/Ncd)O7$-prIeC;socd'f3bQc>;d*g@ke+M/he,IkoeHF@Ld*Ku6bSeLbrmf*9h n,NCds8N`2jmhui[&9q0cLgZ'`RC,s]$g;rm`c9c^S,%&8k^`A!<=^UdF$>?ci;;jc25`_c1K6? c1oKmaQNQQaNWTBrp'+Io^;A6s*t~> n,NCds8N`2jRDfeYbS(ubOk<#`RC,s]$g;rm`c9c^S,%+?#We)0-s#mdF$>?ci;;jc25`_c1K6? c1oKmaQNQQaNWTBros%Ho^2;5s*t~> n,NCds8N`2i9p3XWLB?S`UiNn`RC,s]$g;rm`c9c^S,%5K=IqPN,NaJdF$>?ci;;jc25`_c1K6? c1oKmaQNQQaNWTBroWeCoBYu0s*t~> k5PqQkjS!)j8]/X`RC,s]$fW_&B_\W>sV%L,mOGN'qs%%ccs,H#0?!,e]>tmhZ&*~> k5PqPkO.d%irB&W`RC,s]$fW_&B_\W>tAC<:cL-U0;d.Dccs,H#0?!,e]>tmhZ&*~> k5PqMjR)?rhuE`T`RC,s]$fW_&B_\W>ud#oW18lhA['S/ccs,H#0?!,e]>tmhZ&*~> hZ!fY`RC,s]$fW_&BVSU>X(P7*tms8M3PoCM>?k5Tr~> hZ!fY`RC,s]$fW_&BVSU>Y&=38iA@N+dgR&ccs,H%`mi4e]>tms8M0OoCM;=k5Tr~> hZ!fY`RC,s]$fW_&BVSU>Zm]'V4*Bb=es?jccs,H%`mi4e]>tms8M'Jo()#7k5Tr~> k5PqapA+OXo`+si`RC,s]$fW_#KjcR@QuRo')_e5JAfp`c+M79aQNQQaN`W!(rp%87` s8N#cs*t~> k5PqapA+OXo`+si`RC,s]$fW_&B__[@S0g$5r(&<)MlA]d*95I( k5Pq_p%\=To)Jag`RC,s]$fW_&B__[@UZC nG`LerqmK0s8D$Jo[0t>Vm+M>nG:,QQ?Tpgj3.bO_l\2U&dAC8!!IM\f[A$WbS/(\rmf*9]r80E 4?kKQZa6L(`4u;b"2;)%rpp)>~> nG`LerqmK0s8CsGo?XS3UTD]2n+jrOQ?Tpgj3/(X_l\2a5;b)F-6*_J_:[2\]Z_&1kPj#QcaogP Z8+9_I]orIT>\JC]`Z!SjSn`MJ,~> nG`LerqmK0s8CgAo#[eqRAIpomJ4`MQ?Tpgj3/(X_l\3$R$3J]EE58]`7WM_]Z_;8kPj#Qd'o@: W?3)lEi,^tPet/J]Y(q`jSn`MJ,~> o)Adjl.5jc']%X%T=i.jF=oOfd_r9R`RC,s]$fW_"j=rtP[YU0!!NL^bhCOj]Z_87kPj#Qb)&l( 91_fK8m#\FFCS-B;,L(=\+0KK~> o)Adjl.5jc'\qR#RCL,ZCa1QAcbc^I`RC,s]$fW_&Bi,*P\`N2,pFpd 6!Z]>6U*e1?YOFf:/4S\9e3^Cs*t~> o)BX-l.4G;fA,6s_ /P.B?/1N)093#>):/+GX9ImUBs*t~> nc'JP+ttc,/i-(>>#7"?:f'J5:]S8Le!>3hdc/IRbJg`I nc'JP+tkW(/MTe2;F`&k7n5K`7f^3hdc/I]bJg`MAfDR%&e6+minDl+]Z_;8kPj#Qdb<@- o-u=,U?::H]_VR(p%S1YrVlBfJ,~> nc'JP+Y>@9c0ejaX0`\u-e!>3hdc/I]bJg`VL/LQ-1b(JrinDl+]Z_;8kPj#Qdb*%" ner/CS)i82Z24Itp@n:ZrVlBfJ,~> o)BX-o'>T*l0Ia&Tu>75GUtaihp/k%`RC,s]$fQ]%a2o2^h=kk%7?67i7H?#]Z_)2kPj#Qc/ANq ]?&4=^t\e`q>UEorr2BdJ,~> o)BX-o'>T*l0I^%S&!8'E?QiDh9Zq>UEorr2BdJ,~> o)BX-o'>T*l0I^&N4I!]AIB/Sg;gMc`RC,s]$fQ]%a2o3`J6u40O::.i7H?#]Z^u/kPj#Qc/8En YJA)n[ab6Nq#: n,NCds8N`2lLX_p\$30HbP(Q(`RC,s]$fK[$dI&6Z`ppXfAPB3cF_:1aQNQQaNWTBrpBCPp@7k= s*t~> n,NCds8N`2kk"Ml[&gI;aS,3$`RC,s]$fK[$dI&8[^3Nbf\kK4cF_:1aQNQQaNWTBrp9=Op@.e< s*t~> n,NCds8N`2jmi#`Xe_hp_Y*Hq`RC,s]$fK[$dI&<]Xbf#g>Uc7cF_:1aQNQQaNWTBrp'.Kp$VJ7 s*t~> hZ!fY`RC,s]$fK[$dR,Kg>:cFgssftc+D1(aQNQQaNWT!s*t~> hZ!fY`RC,s]$fK[$dR,Kg>:cFgssftc+D1(aQNQQaNWT!s*t~> hZ!fY`RC,s]$fK[$dR,Kg>:cFgssftc+D1(aQNQQaNWT!s*t~> hZ!fY`RC,s]$fK["3o- hZ!fY`RC,s]$fK["3o- hZ!fY`RC,s]$fK["3o-.W'b22:\J,~> k5PqHhs'C`g&M*N`RC,s]$fEY!6tGfs38*u)T_+@e]?+nl0-lEG\aVEle:%js8Muqrr;uso)F4~> k5PqFh<=([fDkmL`RC,s]$fEY!6tGfs38*u)T_+@e]?+mki^W:Eau91lIa\ds8Muqrr;uso)F4~> k5PqAg?.PPdf9@G`RC,s]$fEY!6tGfs38*u)T_+@e]?+ljQ4p$?rj2]kgRoWs8Muqrr;uso)F4~> o)JahrVRB/s8CmDo$ae3T<-H2mJ4`MQ?Tpggs#]@\',N,kPj#Qca]UHXZ\p)J?,cET=_K/ZN?u] rpp)>~> o)JahrVRB/s8CgAo$OJ'R]"I$m.nWLQ?Tpggs#]@\',N,kPj#QcaB.:V_^4YGc%C-S%H'+ZN?u] rpp)>~> o)JahrVRB/s8CU8nB@YdO.XP`lM/?IQ?Tpggs#]@\',N,kPj#Qd'8S"SKJseCRgrTO1DXsZ3.G0 i;W o)BX-YBb\NJqJf?Ll.%>=[5ATJ;I7T`RC,s]$cS^)T_+@e]?%BQD($;5r o)BX-YBbYLJV/Z o)BX-YBbVJJ:`H5Dfg)84Wkt0BQR)"`RC,s]$cS^)T_+@e]?( o)BX-Vf$[%GBS7dKS"_rUBa s*t~> o)AdjVf)KW'QnYTI=$6T9/o(+C3`:r`RC,s]$cS^(Wbe=e]?+mk2tH1BN_Fqkge/Zrr)Wgq>UBa s*t~> o)AdjVf)HU'6f"CCMI$4,:59NC.$@ee!>3hdZ)HeaQNQQaO&]$j4IL89;UM-ccl:crV? o)J^g(]F:0rr;o*f\ttP92pQ,iRdbAe!>3hdZ)H\aQNQQaNWTBr57CMiQgEEs*t~> o)J^g(]F:0rr;o&e`#VF6;)agi7.D3hdZ)H\aQNQQaNWTBr4q.Hi6:*@s*t~> o)J^g(]F:0rr;qpd,!]00K4%4h9Y]3e!>3hdZ)H\aQNQQaNWTBr41M:h8\43s*t~> k5Pq0akGIm_#OH5`RC,s]$cS^#0?!,e]>tmhZ&*~> k5Pq-a4].f]`8$1`RC,s]$cS^#0?!,e]>tmhZ&*~> k5Pq&_:@)R[K$:*`RC,s]$cS^#0?!,e]>tmhZ&*~> k5YJZs8Duqs8N<%`RC,s]$cS^%`mi4e]>tms8MQ_q"XLVk5Tr~> k5YJZs8Duqs8N<%`RC,s]$cS^%`mi4e]>tms8MQ_q"XIUk5Tr~> k5YJZs8<<&qu?]p`RC,s]$cS^%`mi4e]>tms8MK\p\47Qk5Tr~> hZ!fY`RC,s]$cS^%`mi4e]>tms89:Ug=jEsk5Tr~> hZ!fY`RC,s]$cS^%`mi4e]>tms80(Og"F-lk5Tr~> hZ!fY`RC,s]$cS^%`mi4e]>tms8/bAe^_1Yk5Tr~> k5Pq4c.q+#`;fl9`RC,s]$cS^)T_+@e]?1nh;.Qb5qK"ti5W\4rr)`krVulro)F4~> k5Pq1b1k[q_#OH5`RC,s]$cS^)T_+@e]?1mgYD6T3$G*Zhnm/+rr)`krVulro)F4~> k5Pq*`Rrh`\c;^.`RC,s]$cS^)T_+@e]?1kf%KF:*tOq&gq'rlrr)`krVulro)F4~> o)Jah!;ZWh'`J%,]%Q**N_WW/k2P%d`RC,s]$cS^)9D"?e]>po=):o-91D`dDK^;B>?P39=uj7c s*t~> o)Jah!;ZWh'`J%,[FXpj8nCXZ69I\=B5Vd,>$5'6=uj7c s*t~> o)Jah!;ZWh'`J%-WR9S]IPX$,ini&R`RC,s]$cS^'ZfJ:e]?!f3)VX_.jm&?<*NmG=]kZ+!BIg_ s*t~> o)BX-[Y'HqMi<^bMiWpU>WtMWLlYlo`RC,s]$cS^)T_+@e]?%>O.2P&68itTSX5\ATUq[ERuGtu o)F4~> o)BX-[Y'EoMN!R_KSbS;;`$L0JVm^``RC,s]$cS^)T_+@e]?%:L6du[3@o$3R$!Q0T:MIARuGtu o)F4~> o)Adj[Y+R9'TA';EdE+S5oq44EdV$@`RC,s]$cS^)T_+@e]?(7H&@;"+W+L@NIlk\T:MI@RZ,kt o)F4~> o)BX-iQ]]ocHstfR^B`HE%sCd`k.o,`RC,s]$cS^'ZfJ:e]?"mp[lA/[^!L#q"!tSs8N#rn,In~> o)BX-iQ]]ocHjnePd%^7BI5B>_RZ6#`RC,s]$cS^'ZfJ:e]?"mp@Q/(Z`Udmp[RbPs8N#rn,In~> o)BX-iQ]ZmcHjndKVl#d>7hZK] o)Adjs8W)ts8N`2hX0j_XdGWYddc]"`RC,s]$cS^%`[W0e]?"ps8M'JnaYi5k5Tr~> o)Adjs8W)ts8N`2h!FRZWKW^Hcgg>s`RC,s]$cS^%`[W0e]?"ps8M$HnaYf3k5Tr~> o)Adjs8W)ts8N`2f'2\JTo"`"b4"Ti`RC,s]$cS^%`[W0e]?"ps8LpDn*fB,k5Tr~> k5PqGhWX1\f`2!M`RC,s]$cS^#0uK4eAfbnhZ&*~> k5PqEh!!tYf)PdK`RC,s]$cS^#0uK4eAfbnhZ&*~> k5Pq@g#_>LdJs7F`RC,s]$cS^#0uK4eAfbnhZ&*~> hZ!fXd+4M,]$cS^#1W,AfZD@thZ&*~> hZ!fXd+4M,]$cS^#1W,AfZD@thZ&*~> hZ!fXd+4M,]$cS^#1W,AfZD@thZ&*~> hZ!fYo&lHb]$cV_#KY9Zs68.Zinj]c~> hZ!fYo&lHb]$cV_#KY9Zs68.Zinj]c~> hZ!fYo&lHb]$cV_#KY9Zs68.Zinj]c~> h>[ZNpRlW>dZDZZbK8''rr;66b2V:XJ,~> h>[ZNpRlW>dZDZZbK8''rr;66b2V:XJ,~> h>[ZNpRlW>dZDZZbK8''rr;66b2V:XJ,~> h#@NTS8PpdON[dJ_#=94"9%9"inj]c~> h#@NTS8PpdON[dJ_#=94"9%9"inj]c~> h#@NTS8PpdON[dJ_#=94"9%9"inj]c~> g]%AYLr7W%bQ6&frVluun(H[Vs*t~> g]%AYLr7W%bQ6&frVluun(H[Vs*t~> g]%AYLr7W%bQ6&frVluun(H[Vs*t~> g]%BGf#,drbQ>o&p&4mnrTE;3qVM7&~> g]%BGf#,drbQ>o&p&4mnrTE;3qVM7&~> g]%BGf#,drbQ>o&p&4mnrTE;3qVM7&~> gA_6Rh8tl)!liO_rVluuhWX[gs*t~> gA_6Rh8tl)!liO_rVluuhWX[gs*t~> gA_6Rh8tl)!liO_rVluuhWX[gs*t~> gA_9Spu0>[bQ>`4p&4mnrT`YArS7F'~> gA_9Spu0>[bQ>`4p&4mnrT`YArS7F'~> gA_9Spu0>[bQ>`4p&4mnrT`YArS7F'~> g&D6So]"' g&D6So]"' g&D6So]"' g&D6TrV5 g&D6TrV5 g&D6TrV5 fDbsMnF6:Dq#^?hrr)j"p>b<0rRq4$~> fDbsMnF6:Dq#^?hrr)j"p>b<0rRq4$~> fDbsMnF6:Dq#^?hrr)j"p>b<0rRq4$~> fDc'Rn`0Vki8^?e!9F+A"7"h fDc'Rn`0Vki8^?e!9F+A"7"h fDc'Rn`0Vki8^?e!9F+A"7"h f)GpPa5rr(_u7RUa2n.ts2Y#V!Q`=Z`W=-%qo/ZXa8 f)GpPa5rr(_u7RUa2n.ts2Y#V!Q`=Z`W=-%qo8ZWqo/ZXa8EpXa3"4us2b)W!Q`=Y`W=3%r5JcX aSWsXaN+1t$HU6,`QHKPdaI7Kf)L7~> f)GpPa5rr(_u7RUaN=>!!Q`=Z`WF3&a8EpYa2uC"`rF-W`W=0$r5JfYaiF>!s2b)W!Q`=Z`WF3& a8EpYa2uC"`W=3&r5K,baiDE?b0J>]dcL)Os*t~> ec-$GVZ4`klg!d$nFPuEl3$A@n*Br;!pfLHrTXLNme,oEl3$ACna$/="7,OEli-/Olg4<2rTXLN naYrDl3$ABmd'i:!pfOIrTXLNn+5lDl3$ADn*Br;$L@ ec-$GVZ4`klg!d$nau2Hl3$DBnEg,="75^Lli-/PlgOQ6li-/Olgs]6rTXLOoBklCl3$A>p?hhD "75RKm/?2Om.0W2rosUOm.ToHl3$AAo^)PA"75[Lm/?2Vm.BZ1m-X0/pA4a ec-$GVZ4`klg!d$o(DDKl3$DCna-8?"RGIKnE^)="RPXPmHac:"75[Om/?2OmI]i4rosUOme?/J l3$GEoBlMA"75XKm/H8Plg=H9rTXLOnb);Jl3$DGna-5>$LRNRlg4$*nFl_Wec1.~> e,KI:qu-NsrT2c*rVlupkNM^?rri>ng$8P8rri2aeF`b>rr`,RiqNEO"81O?r;Q]uqsi/,rVlun j5BV/rr`#Slh^V\"9%i?m/?qeq!$N)rr2p"ptFs,rVlulg[tHis*t~> e,KI:qu-Nsr8cN&rVluojlcFnfBN85rri2`de*Pr;Q]uqs`#)rVlun iSO8+rr`#RlMCM["9%i=m/?qepupB&rr2p"pt=j*rVlulg%>6gs*t~> e,KI:qu-Nsr8H/srVluoioKk5rri>ldcUN-rri/\c1:i4rr`)Kh=ggI"7t46r;Q]uqsM\urVlum hV.W#rr_uMkP>,W"9%f7l2CVbpZ9lprr2p"pXJ9urVlujeai[as*t~> c2Rn=QYY.Mrr_8OUu_OT"T-VXWqZMm"RN9jde`tC"6t&gi;NZXepPI/rr3)i`_b"[rr_&9J_9r$ "5\g8m/I"epsDpFrVm#]V/"HWrr3)[Cnftgrr^_fdeD&eJ,~> c2Rn=P%<)@rr_5IT]?%O"T-SPVt^2j"RE*]dJEkB"6j`Zhu3QWeTSk%rr3)h`(/,Orr_#3He85r "5\U/li-ndps2I8rVm#]TjuLKrr3)ZAY.r\rr^\^dJ(rdJ,~> c2Rn;Ki-.orr_)9Pi2Q@"T$;5SG)s^"R2U c2Rn?Rr-sZrr_GXWTjE^"T6tcYke:u"Ri]sgACmL"7:>pk5G;^h1*ZBrr3)kbu!!irr_8CK\ub. "6>?Gn,E=hq:JNTrVm#bWb^>err3)`DPu^srr_"pgA&tnJ,~> c2Rn?Q=enMrr_DSVIbt&' "65*>n,E=hq:A-GrVm#aVI\BYrr3)`B;=\hrr^thg%`kmJ,~> c2Rn>MH&7,rr_;ES*'_M"T-_BU\Oig"RW-Geb]:F"7'BDir/lZfkA=rrr3)j`A,a cMqA;bF)N#_"7TMi1d=/d/WURn5$L6li6A%]Jo?)s8UKuBo:lfs80UuEnnmss5;8\1#g"Gs4udp HKs,^s2!`I]%,=1jiHjE`5(jAkhWCCnBV(6c/2a*h9Z,k_!bV!aQh+OJ,~> cMqA9aHKEd]^u0FhOLLuc2[7KmmOG"lMp5![jpjhs8UEq?\71Os80IpB\LSbs5)&V,iH?4s4cUf ETksQs1[H<[aNV(jMgL3^qB.9kM)q/n&tb2b2#migs#ff]^A_e`p(hLJ,~> cMqA6_1k#9[IO:7g6%rXa8bP cMqA?\6M^ObkD(fO^;I]f`1Q]E_0cRn,MmRA4(CXs8UfH:frb(s8:)e;Jii's5pG#6?gZOs5Si9 <.<^(s36F%A>S)]lFh1(B$BiGm$;'QEmapeeri^7Ph?$-b]?L!]C+#FJ,~> cMqA>Zr9;1an>\`McEl@fDkEXC-Pn4mf2dM>WHQ;s8U`A7Seibs89u[87f'es5g4k2fHh9s5JQ) 8pB+ks3$'h>G0aNl+1Rf?HMd cMqA cMqAD[6R>IgA(^,D*L1,j8\hr1J9jEoDeHH4&7ZTs8V/<84.(Hs8CY<7Q.L cMqADZ8Y'+g%bU*Ai2D`irA_o.7,f%oDeHC0h*V5s8V/64um$+s8CV24Y3]$s6Q:-7lf9$s6 cMqABW?3T0ebB+"<$!#dhuEAf&1ea,o)J<4)))W=s8V&%-R)=s6>a\0d>0Gs6)Th ,V%mTs40Xa)1^cIm^3T\(tJi5STeJ'GDZLQ9@J,~> cMqADZ8tN4gA(^,AN2Sdj8\hq-qZ>/oDeHC1J]F@s8V/66p4r6s8CY26S,A,s6Z@.:c[8.s6EKE 5sXZ6s4^X?1QCPsn@^8I1"u@$no-KX.,"Y#ib(b?D<(Zjg+,ZA](=8JJ,~> cMqADY:r-gg%bU*?7mdBirA_n*C)*boDeH>-q55ts8V/14#:!os8CV'3?bBgs6Q-s7k`Qos6<*1 2`Bk!s4U:,.>[*cn@K]2-e\4nnS'O=*S:>kiF"i#B&`jafcWEu\+7lFJ,~> cMqABV]$rtebB+":)OgMhuEAe#V-Upo)J<1'/0^.s8V&!,p#V0s8CIe,7L:2s6>XR0cS[@s6)?^ +X?(Js40FX'7Aj?m^*6S&_6RSn66YW#h/hRhGG[;=5X&OeI3f2Yjp'>J,~> cMqA>\RJ9can>\`Qt0m#fDkEZIS+Chmf2dUD*i]ls8U`K;I,U5s89um]4s3$R/CoQ1hl+ML1DoqSMm%%f`IaJ,peX9-ES(@W2aa['7]^=&FJ,~> cMqA=[95kEa7]J\P?_I]ec53UG<]TKmJlXOAN+hOs8U]E85kVns89od95q5ts5^8$21!=?s5Af6 :4h:$s2g6tA>IoYkdkppB?0QBl^)!GGL$6ge!!.*QIPs*a*:!r\a@`CJ,~> cMqA;Y"10c_=[cPMG-j,d/WUGCai>ili6@C=!)7ms8UN81dZu:s80WS3+T)Js5Bbb*H/dks4r2l 4*]EXs2'CP/B_.l&S[nCru_WcAO;NNR7^q_/:oBZKoj:J,~> cMqAR6kKE6K_o2*FlJ8RFn'V18d,80/hU;Gp`9q"$bj<[UJ,~> cMqA;bEGcg_=[cPhOLM!d/WUSmRFJ&li6A%[k-sls8UNu@"[CVs80XtC>$hhs5D>Z-JuZ:s4uah Ep20Us23iB[FEb,k/cj8^VTF>l.`.3ma(q5cJDHpgsH)k_ cMqA8`J-M>](5mAgQA&ZbQ%%FlnGBTl2U(qY8$?Es8U?l:3G@+s80Cj=OkFHs4tlM%G\Pks4ZFT @cc)=s1I3(XjGGpj21'j\@V,.k1H7dm)f;-aP&hHfuj?`]'DZK_s#GHJ,~> c2Rn?SSm?`rr_J[X6T]a"T7(fZMFM""Rrj!h>@3O"7LMtkPbD_i.'&Grr3)lcVW9mrr_>FL#N"1 "6PNLnG`Fiq:e`YrVm#dXDH_krr3)bDlN%#rr_(sh>,@rJ,~> c2Rn?QtG1Qrr_JVVWn*["T7"\Y5/(s"Rr]jg\_!M"7C2gk5G;^hKd?=rr3)lbt$@`rr_;@J)L;* "6G6An,E=hq:S9KrVm#cW+F`^rr3)aB;Okkrr_%kg\K.pJ,~> c2Rn>N)\O0rr_AHSEKnO"T6nFV>1&i"R`9Jf_YUI"70KGjSf)\gM+\#rr3)ka>)-Arr_51E8CKm "6+O'mf*4gq:7:*rVm#aSl45>rr3)_ c2Rn"6b,ohYmHVdt5I.rr3)h`*1C`rr^u;L=Q8& "5Jg:lMgecpWZmJrVm#[VK'o[rr3)YFJRmorr^VkcM#Q`J,~> c2RnRVY:#h"R2pdbPD/;"6Xfag\q-Sd!NLtrr3)f^f&;Qrr^l2JC=Dr "58I.l2L\bpW?C;rVm#YTk_dLrr3)WCnBP_rr^M`bP'6]J,~> c2Rn:L0;Urrr^r6Phc9<"T$)7S+Zd\"Ql=A`VBH4"63p=fDY^Ob%eBSrr3)d\3.Y,rr^\sE6S:\ "4_Rdk5PA_p;9"hrVm#UPuks&rr3)S>*@Y=rr^8G`UqIUJ,~> c2Rn4O+")-rr^9(PKEh("S\jAQLOnR"PT8cVY'lf"4`!\_YsK:Y_8\5rr3)ZU2m=5rr]lkL9gdX "2B\Zf`(mQnY^DrrVm#DQ$fJ+rr3)@IZqkOrr]6MVXMh1J,~> c2Rn2M0>cprr^,rNQ1tt"S\X3ORN2K"P8iRT_/6`"4MUL^A\'6WdU?!rr3)XS8+o!rr]`aJ$&\L "1s/If)G[On=aT\rVm#AO*.,mrr3)rr]'@T^L,*J,~> c2Rn/G@R$=rr]cWI(fOZ"SIsdJ*m78"OVg'P4JVP"3Y@u[/L",S8+#Crr3)SNE2GFrr] c2RnEW,MS3rr`,%]`%j0"TJ1,`;]c9"8p58qu-Nsr-Y%4rVluqK^&L8rr`5_8b2i["8\LHr;HWt qNK&"rr3*"p.<[1rr`/1D#=22"8ZrIrVc`upi?92cMrC~> c2RnEW,DM2rr`,%]D_a/"TJ1,`;]c9"8p26qu-Nsr-Oq2rVluqKBW=6rr`5_8Fl`Z"8\LGr;HWt qNAqurr3*"p.3U0rr`/1D#=22"8ZoGrVc`upi631cMrC~> c2RnFV.ff*rr`/$\c)O-"TJ4'_uBZ8"8p//r;HWtr-+P,rVlurJ*Ht3rr`5_61Y!S"8\C@r;HWt qiAesrr3*"pHd1)rr`//B)MW-"8cfArVc`uq/$!.cMrC~> bl7^;?Msg)ri bl7^;>l=U'ri3C(rrW0s^\n*4`Ad\o!H!qrrrW1Cl2:P_l7qr0rrW1_J,K bl7^:>PnF$WO;[(!rc$2r;Qf:2>dCSCQJDn!rd?,r;Qi`0D,5C!re=2r;QitL;a#i!rO^sr;Qf+ =Sr+!>dF;-!WHKNs*t~> c2RnFSlu$jrr`4tZi9q'!rPg,rVm!!___8irr`77NW/nV"8m6'rr2p"robs)rVluuO*pp)rr`4Q _Z'N6!r=LmrVluuZr[/(rr`4'Yl=V$!rQ@&c2W:~> c2RnFSlu$jrr`4tZi9q'!rPg,rVm!!___8irr`77NW/nV"8m6'rr2p"robs)rVluuO*pp)rr`4Q _Z'N6!r=LmrVluuZr[/(rr`4'Yl=V$!rQ@&c2W:~> c2RnFSQPjhrr`4tZi9q'!rPd+rVm!!__V2hrr`77NW/nV"8m3&rr2p"robp(rVluuNdLa'rr`4Q _Z'N6!r=IlrVluuZW@&'rr`4&Yl=V$!rQ@&c2W:~> c2RnES6Ysirr`(nZi0n'"TJ*k^&J$2"8fknqu-NsqeM8jrVluoFQWT%rr`2Z1%G5B"8In*r;HWt pk6N_rr3*"oJ+Sgrr`,&>5S9u"8Q3,rVc`up0ddscMrC~> c2RnERp5dgrr`(nZi0n'"TJ*j^&J$2"8fhkqu-Nsqe;)grVluoEp!B#rr`2Z0_,,A"8Ik(r;HWt pk-H^rr3*"oJ"Mfrr`,&=Sr's"8Q0*rVc`up0RXqcMrC~> c2RnERp#Xerr`+nZMje&"TJ-i]`.p1"8fhiqu-Nsqe1uerVluoEp!B#rr`2Z0(Jo?"8Rn'r;HWt q1?H]rr3*"oIe>crr`,&=8Vsr"8Z3*rVc`upKd[qcMrC~> c2RnESQu'jrr`(oZi0n'"TJ*l^&J$2"8fknqu-NsqeM8jrVluoFQWT%rr`2Z1@b>C"8In*r;HWt q1Z]arr3*"oJ4Yhrr`,'>5S9u"8Q6-rVc`up0ddscMrC~> c2RnES6Pmhrr`(nZi0n'"TJ*j^&J$2"8fklqu-NsqeD/hrVluoF6 c2RnERp#Xerr`+nZMje&"TJ-j]`.p1"8fhiqu-Nsqe1uerVluoEp!B#rr`2Z0(Ai>"8Rq(r;HWt q1HN^rr3*"oIe>crr`,&=8Vsr"8Z3*rVc`upKd[qcMrC~> c2RnES6Psjrr`(nZi0n'"9/!i^A\'6qnG&erVlusBoi5krr`"9kPkJ\"8q15o)8Rjpl0_#rVlup K>RWerr`8m0[9Rr"8]9)r;HWtq-1-&rVlrm?Mr(JJ,~> c2RnERp,dhrr`(mZi0n'"9/!h^&@s5qnG#drVlusBo`/jrr`"9kPkJ\"8q14o)8Rjpl0_#rVlup K>RWerr`8m0?jCp"8]6(r;HWtq-('%rVlrm?Mr(JJ,~> c2RnERT]Ufrr`(mZi0n'"9/!g^&@s5qnFrbrVlusBT;uhrr`"8kPkJ\"8q12o)8Rjpl'V!rVlup K#7Ndrr`8m/^41n"8]6&r;HWtq,ss#rVlrm>l;kHJ,~> c2RkEO@/sG!rSOfr;Qiq2R`Sa!r]\Ir;Qis c2RkEO@/sG!rSOfr;Qiq2R`Sa!r]\Ir;Qis c2RkEO@/sG!rSOfr;Qiq27EJ`!r]\Ir;Qis bl7a\Q2^a^"9-?err)iurf5P-rr`8RIK'3F"9+t&rr)iurhf2`rr`8jH2%:;"9,g3rr)iurit,R rrW+Dhu3QWrm-i.rVm!!QHfF-rrW1erltIn~> bl7a\Q2^a^"9-?err)iurf5P-rr`8RIK'3F"9+t&rr)iurhf2`rr`8jH2%:;"9,g3rr)iurit,R rrW+Dhu3QWrm-i.rVm!!QHfF-rrW1erltIn~> bl7a\Q2^a^"9-?err)iurf5P-rr`8RIK'3F"9+t&rr)iurhf2`rr`8jH2%:;"9,g3rr)iurit,R rrW+Dhu3QWrm-i.rVm!!QHfF-rrW1erltIn~> bl7bDs8MusrVlcq!r`/urVuos!WW,srr`9#rr2lrrr2lrs8E'!rr)lqrr)lqrr)iurVulqs8W'! s8Dor"9/?"rr)iurVui@s*t~> bl7bDs8MusrVlcq!r`/urVuos!WW,srr`9#rr2lrrr2lrs8E'!rr)lqrr)lqrr)iurVulqs8W'! s8Dor"9/?"rr)iurVui@s*t~> bl7bDs8MusrVlcq!r`/urVuos!WW,srr`9#rr2lrrr2lrs8E'!rr)lqrr)lqrr)iurVulqs8W'! s8Dor"9/?"rr)iurVui@s*t~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> JcE:\J,~> %%EndData showpage %%Trailer end %%EOF ViennaCL-1.5.1-src/doc/manual/figures/solver.eps000644 001750 001750 00000046305 12267307531 021507 0ustar00rupprupp000000 000000 %!PS-Adobe-2.0 EPSF-2.0 %%Title: solver.eps %%Creator: gnuplot 4.2 patchlevel 5 %%CreationDate: Thu Apr 8 13:18:45 2010 %%DocumentFonts: (atend) %%BoundingBox: 50 120 410 232 %%EndComments %%BeginProlog /gnudict 256 dict def gnudict begin % % The following 6 true/false flags may be edited by hand if required % The unit line width may also be changed % /Color true def /Blacktext false def /Solid false def /Dashlength 1 def /Landscape false def /Level1 false def /Rounded false def /TransparentPatterns false def /gnulinewidth 5.000 def /userlinewidth gnulinewidth def % /vshift -46 def /dl1 { 10.0 Dashlength mul mul Rounded { currentlinewidth 0.75 mul sub dup 0 le { pop 0.01 } if } if } def /dl2 { 10.0 Dashlength mul mul Rounded { currentlinewidth 0.75 mul add } if } def /hpt_ 31.5 def /vpt_ 31.5 def /hpt hpt_ def /vpt vpt_ def Level1 {} { /SDict 10 dict def systemdict /pdfmark known not { userdict /pdfmark systemdict /cleartomark get put } if SDict begin [ /Title (solver.eps) /Subject (gnuplot plot) /Creator (gnuplot 4.2 patchlevel 5 ) /Author (Karl Rupp) % /Producer (gnuplot) % /Keywords () /CreationDate (Thu Apr 8 13:18:45 2010) /DOCINFO pdfmark end } ifelse % % Gnuplot Prolog Version 4.2 (August 2006) % /M {moveto} bind def /L {lineto} bind def /R {rmoveto} bind def /V {rlineto} bind def /N {newpath moveto} bind def /Z {closepath} bind def /C {setrgbcolor} bind def /f {rlineto fill} bind def /vpt2 vpt 2 mul def /hpt2 hpt 2 mul def /Lshow {currentpoint stroke M 0 vshift R Blacktext {gsave 0 setgray show grestore} {show} ifelse} def /Rshow {currentpoint stroke M dup stringwidth pop neg vshift R Blacktext {gsave 0 setgray show grestore} {show} ifelse} def /Cshow {currentpoint stroke M dup stringwidth pop -2 div vshift R Blacktext {gsave 0 setgray show grestore} {show} ifelse} def /UP {dup vpt_ mul /vpt exch def hpt_ mul /hpt exch def /hpt2 hpt 2 mul def /vpt2 vpt 2 mul def} def /DL {Color {setrgbcolor Solid {pop []} if 0 setdash} {pop pop pop 0 setgray Solid {pop []} if 0 setdash} ifelse} def /BL {stroke userlinewidth 2 mul setlinewidth Rounded {1 setlinejoin 1 setlinecap} if} def /AL {stroke userlinewidth 2 div setlinewidth Rounded {1 setlinejoin 1 setlinecap} if} def /UL {dup gnulinewidth mul /userlinewidth exch def dup 1 lt {pop 1} if 10 mul /udl exch def} def /PL {stroke userlinewidth setlinewidth Rounded {1 setlinejoin 1 setlinecap} if} def % Default Line colors /LCw {1 1 1} def /LCb {0 0 0} def /LCa {0 0 0} def /LC0 {1 0 0} def /LC1 {0 1 0} def /LC2 {0 0 1} def /LC3 {1 0 1} def /LC4 {0 1 1} def /LC5 {1 1 0} def /LC6 {0 0 0} def /LC7 {1 0.3 0} def /LC8 {0.5 0.5 0.5} def % Default Line Types /LTw {PL [] 1 setgray} def /LTb {BL [] LCb DL} def /LTa {AL [1 udl mul 2 udl mul] 0 setdash LCa setrgbcolor} def /LT0 {PL [] LC0 DL} def /LT1 {PL [4 dl1 2 dl2] LC1 DL} def /LT2 {PL [2 dl1 3 dl2] LC2 DL} def /LT3 {PL [1 dl1 1.5 dl2] LC3 DL} def /LT4 {PL [6 dl1 2 dl2 1 dl1 2 dl2] LC4 DL} def /LT5 {PL [3 dl1 3 dl2 1 dl1 3 dl2] LC5 DL} def /LT6 {PL [2 dl1 2 dl2 2 dl1 6 dl2] LC6 DL} def /LT7 {PL [1 dl1 2 dl2 6 dl1 2 dl2 1 dl1 2 dl2] LC7 DL} def /LT8 {PL [2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 4 dl2] LC8 DL} def /Pnt {stroke [] 0 setdash gsave 1 setlinecap M 0 0 V stroke grestore} def /Dia {stroke [] 0 setdash 2 copy vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V closepath stroke Pnt} def /Pls {stroke [] 0 setdash vpt sub M 0 vpt2 V currentpoint stroke M hpt neg vpt neg R hpt2 0 V stroke } def /Box {stroke [] 0 setdash 2 copy exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V closepath stroke Pnt} def /Crs {stroke [] 0 setdash exch hpt sub exch vpt add M hpt2 vpt2 neg V currentpoint stroke M hpt2 neg 0 R hpt2 vpt2 V stroke} def /TriU {stroke [] 0 setdash 2 copy vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V closepath stroke Pnt} def /Star {2 copy Pls Crs} def /BoxF {stroke [] 0 setdash exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V closepath fill} def /TriUF {stroke [] 0 setdash vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V closepath fill} def /TriD {stroke [] 0 setdash 2 copy vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V closepath stroke Pnt} def /TriDF {stroke [] 0 setdash vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V closepath fill} def /DiaF {stroke [] 0 setdash vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V closepath fill} def /Pent {stroke [] 0 setdash 2 copy gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat closepath stroke grestore Pnt} def /PentF {stroke [] 0 setdash gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat closepath fill grestore} def /Circle {stroke [] 0 setdash 2 copy hpt 0 360 arc stroke Pnt} def /CircleF {stroke [] 0 setdash hpt 0 360 arc fill} def /C0 {BL [] 0 setdash 2 copy moveto vpt 90 450 arc} bind def /C1 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 90 arc closepath fill vpt 0 360 arc closepath} bind def /C2 {BL [] 0 setdash 2 copy moveto 2 copy vpt 90 180 arc closepath fill vpt 0 360 arc closepath} bind def /C3 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 180 arc closepath fill vpt 0 360 arc closepath} bind def /C4 {BL [] 0 setdash 2 copy moveto 2 copy vpt 180 270 arc closepath fill vpt 0 360 arc closepath} bind def /C5 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 90 arc 2 copy moveto 2 copy vpt 180 270 arc closepath fill vpt 0 360 arc} bind def /C6 {BL [] 0 setdash 2 copy moveto 2 copy vpt 90 270 arc closepath fill vpt 0 360 arc closepath} bind def /C7 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 270 arc closepath fill vpt 0 360 arc closepath} bind def /C8 {BL [] 0 setdash 2 copy moveto 2 copy vpt 270 360 arc closepath fill vpt 0 360 arc closepath} bind def /C9 {BL [] 0 setdash 2 copy moveto 2 copy vpt 270 450 arc closepath fill vpt 0 360 arc closepath} bind def /C10 {BL [] 0 setdash 2 copy 2 copy moveto vpt 270 360 arc closepath fill 2 copy moveto 2 copy vpt 90 180 arc closepath fill vpt 0 360 arc closepath} bind def /C11 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 180 arc closepath fill 2 copy moveto 2 copy vpt 270 360 arc closepath fill vpt 0 360 arc closepath} bind def /C12 {BL [] 0 setdash 2 copy moveto 2 copy vpt 180 360 arc closepath fill vpt 0 360 arc closepath} bind def /C13 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 90 arc closepath fill 2 copy moveto 2 copy vpt 180 360 arc closepath fill vpt 0 360 arc closepath} bind def /C14 {BL [] 0 setdash 2 copy moveto 2 copy vpt 90 360 arc closepath fill vpt 0 360 arc} bind def /C15 {BL [] 0 setdash 2 copy vpt 0 360 arc closepath fill vpt 0 360 arc closepath} bind def /Rec {newpath 4 2 roll moveto 1 index 0 rlineto 0 exch rlineto neg 0 rlineto closepath} bind def /Square {dup Rec} bind def /Bsquare {vpt sub exch vpt sub exch vpt2 Square} bind def /S0 {BL [] 0 setdash 2 copy moveto 0 vpt rlineto BL Bsquare} bind def /S1 {BL [] 0 setdash 2 copy vpt Square fill Bsquare} bind def /S2 {BL [] 0 setdash 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def /S3 {BL [] 0 setdash 2 copy exch vpt sub exch vpt2 vpt Rec fill Bsquare} bind def /S4 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def /S5 {BL [] 0 setdash 2 copy 2 copy vpt Square fill exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def /S6 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill Bsquare} bind def /S7 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill 2 copy vpt Square fill Bsquare} bind def /S8 {BL [] 0 setdash 2 copy vpt sub vpt Square fill Bsquare} bind def /S9 {BL [] 0 setdash 2 copy vpt sub vpt vpt2 Rec fill Bsquare} bind def /S10 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def /S11 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt2 vpt Rec fill Bsquare} bind def /S12 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill Bsquare} bind def /S13 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill 2 copy vpt Square fill Bsquare} bind def /S14 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def /S15 {BL [] 0 setdash 2 copy Bsquare fill Bsquare} bind def /D0 {gsave translate 45 rotate 0 0 S0 stroke grestore} bind def /D1 {gsave translate 45 rotate 0 0 S1 stroke grestore} bind def /D2 {gsave translate 45 rotate 0 0 S2 stroke grestore} bind def /D3 {gsave translate 45 rotate 0 0 S3 stroke grestore} bind def /D4 {gsave translate 45 rotate 0 0 S4 stroke grestore} bind def /D5 {gsave translate 45 rotate 0 0 S5 stroke grestore} bind def /D6 {gsave translate 45 rotate 0 0 S6 stroke grestore} bind def /D7 {gsave translate 45 rotate 0 0 S7 stroke grestore} bind def /D8 {gsave translate 45 rotate 0 0 S8 stroke grestore} bind def /D9 {gsave translate 45 rotate 0 0 S9 stroke grestore} bind def /D10 {gsave translate 45 rotate 0 0 S10 stroke grestore} bind def /D11 {gsave translate 45 rotate 0 0 S11 stroke grestore} bind def /D12 {gsave translate 45 rotate 0 0 S12 stroke grestore} bind def /D13 {gsave translate 45 rotate 0 0 S13 stroke grestore} bind def /D14 {gsave translate 45 rotate 0 0 S14 stroke grestore} bind def /D15 {gsave translate 45 rotate 0 0 S15 stroke grestore} bind def /DiaE {stroke [] 0 setdash vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V closepath stroke} def /BoxE {stroke [] 0 setdash exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V closepath stroke} def /TriUE {stroke [] 0 setdash vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V closepath stroke} def /TriDE {stroke [] 0 setdash vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V closepath stroke} def /PentE {stroke [] 0 setdash gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat closepath stroke grestore} def /CircE {stroke [] 0 setdash hpt 0 360 arc stroke} def /Opaque {gsave closepath 1 setgray fill grestore 0 setgray closepath} def /DiaW {stroke [] 0 setdash vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V Opaque stroke} def /BoxW {stroke [] 0 setdash exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V Opaque stroke} def /TriUW {stroke [] 0 setdash vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V Opaque stroke} def /TriDW {stroke [] 0 setdash vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V Opaque stroke} def /PentW {stroke [] 0 setdash gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat Opaque stroke grestore} def /CircW {stroke [] 0 setdash hpt 0 360 arc Opaque stroke} def /BoxFill {gsave Rec 1 setgray fill grestore} def /Density { /Fillden exch def currentrgbcolor /ColB exch def /ColG exch def /ColR exch def /ColR ColR Fillden mul Fillden sub 1 add def /ColG ColG Fillden mul Fillden sub 1 add def /ColB ColB Fillden mul Fillden sub 1 add def ColR ColG ColB setrgbcolor} def /BoxColFill {gsave Rec PolyFill} def /PolyFill {gsave Density fill grestore grestore} def /h {rlineto rlineto rlineto gsave closepath fill grestore} bind def % % PostScript Level 1 Pattern Fill routine for rectangles % Usage: x y w h s a XX PatternFill % x,y = lower left corner of box to be filled % w,h = width and height of box % a = angle in degrees between lines and x-axis % XX = 0/1 for no/yes cross-hatch % /PatternFill {gsave /PFa [ 9 2 roll ] def PFa 0 get PFa 2 get 2 div add PFa 1 get PFa 3 get 2 div add translate PFa 2 get -2 div PFa 3 get -2 div PFa 2 get PFa 3 get Rec gsave 1 setgray fill grestore clip currentlinewidth 0.5 mul setlinewidth /PFs PFa 2 get dup mul PFa 3 get dup mul add sqrt def 0 0 M PFa 5 get rotate PFs -2 div dup translate 0 1 PFs PFa 4 get div 1 add floor cvi {PFa 4 get mul 0 M 0 PFs V} for 0 PFa 6 get ne { 0 1 PFs PFa 4 get div 1 add floor cvi {PFa 4 get mul 0 2 1 roll M PFs 0 V} for } if stroke grestore} def % /languagelevel where {pop languagelevel} {1} ifelse 2 lt {/InterpretLevel1 true def} {/InterpretLevel1 Level1 def} ifelse % % PostScript level 2 pattern fill definitions % /Level2PatternFill { /Tile8x8 {/PaintType 2 /PatternType 1 /TilingType 1 /BBox [0 0 8 8] /XStep 8 /YStep 8} bind def /KeepColor {currentrgbcolor [/Pattern /DeviceRGB] setcolorspace} bind def << Tile8x8 /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke} >> matrix makepattern /Pat1 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke 0 4 M 4 8 L 8 4 L 4 0 L 0 4 L stroke} >> matrix makepattern /Pat2 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop 0 0 M 0 8 L 8 8 L 8 0 L 0 0 L fill} >> matrix makepattern /Pat3 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop -4 8 M 8 -4 L 0 12 M 12 0 L stroke} >> matrix makepattern /Pat4 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop -4 0 M 8 12 L 0 -4 M 12 8 L stroke} >> matrix makepattern /Pat5 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop -2 8 M 4 -4 L 0 12 M 8 -4 L 4 12 M 10 0 L stroke} >> matrix makepattern /Pat6 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop -2 0 M 4 12 L 0 -4 M 8 12 L 4 -4 M 10 8 L stroke} >> matrix makepattern /Pat7 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop 8 -2 M -4 4 L 12 0 M -4 8 L 12 4 M 0 10 L stroke} >> matrix makepattern /Pat8 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop 0 -2 M 12 4 L -4 0 M 12 8 L -4 4 M 8 10 L stroke} >> matrix makepattern /Pat9 exch def /Pattern1 {PatternBgnd KeepColor Pat1 setpattern} bind def /Pattern2 {PatternBgnd KeepColor Pat2 setpattern} bind def /Pattern3 {PatternBgnd KeepColor Pat3 setpattern} bind def /Pattern4 {PatternBgnd KeepColor Landscape {Pat5} {Pat4} ifelse setpattern} bind def /Pattern5 {PatternBgnd KeepColor Landscape {Pat4} {Pat5} ifelse setpattern} bind def /Pattern6 {PatternBgnd KeepColor Landscape {Pat9} {Pat6} ifelse setpattern} bind def /Pattern7 {PatternBgnd KeepColor Landscape {Pat8} {Pat7} ifelse setpattern} bind def } def % % %End of PostScript Level 2 code % /PatternBgnd { TransparentPatterns {} {gsave 1 setgray fill grestore} ifelse } def % % Substitute for Level 2 pattern fill codes with % grayscale if Level 2 support is not selected. % /Level1PatternFill { /Pattern1 {0.250 Density} bind def /Pattern2 {0.500 Density} bind def /Pattern3 {0.750 Density} bind def /Pattern4 {0.125 Density} bind def /Pattern5 {0.375 Density} bind def /Pattern6 {0.625 Density} bind def /Pattern7 {0.875 Density} bind def } def % % Now test for support of Level 2 code % Level1 {Level1PatternFill} {Level2PatternFill} ifelse % /Symbol-Oblique /Symbol findfont [1 0 .167 1 0 0] makefont dup length dict begin {1 index /FID eq {pop pop} {def} ifelse} forall currentdict end definefont pop /MFshow { { dup 5 get 3 ge { 5 get 3 eq {gsave} {grestore} ifelse } {dup dup 0 get findfont exch 1 get scalefont setfont [ currentpoint ] exch dup 2 get 0 exch R dup 5 get 2 ne {dup dup 6 get exch 4 get {show} {stringwidth pop 0 R} ifelse }if dup 5 get 0 eq {dup 3 get {2 get neg 0 exch R pop} {pop aload pop M} ifelse} {dup 5 get 1 eq {dup 2 get exch dup 3 get exch 6 get stringwidth pop -2 div dup 0 R} {dup 6 get stringwidth pop -2 div 0 R 6 get show 2 index {aload pop M neg 3 -1 roll neg R pop pop} {pop pop pop pop aload pop M} ifelse }ifelse }ifelse } ifelse } forall} bind def /MFwidth {0 exch { dup 5 get 3 ge { 5 get 3 eq { 0 } { pop } ifelse } {dup 3 get{dup dup 0 get findfont exch 1 get scalefont setfont 6 get stringwidth pop add} {pop} ifelse} ifelse} forall} bind def /MLshow { currentpoint stroke M 0 exch R Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def /MRshow { currentpoint stroke M exch dup MFwidth neg 3 -1 roll R Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def /MCshow { currentpoint stroke M exch dup MFwidth -2 div 3 -1 roll R Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def /XYsave { [( ) 1 2 true false 3 ()] } bind def /XYrestore { [( ) 1 2 true false 4 ()] } bind def end %%EndProlog gnudict begin gsave 50 50 translate 0.050 0.050 scale 0 setgray newpath (Helvetica) findfont 140 scalefont setfont 1.000 UL LTb 854 1809 M 63 0 V 6073 0 R -63 0 V stroke 770 1809 M [ [(Helvetica) 140.0 0.0 true true 0 ( 0.1)] ] -46.7 MRshow 1.000 UL LTb 854 1963 M 31 0 V 6105 0 R -31 0 V 854 2053 M 31 0 V 6105 0 R -31 0 V 854 2117 M 31 0 V 6105 0 R -31 0 V 854 2166 M 31 0 V 6105 0 R -31 0 V 854 2207 M 31 0 V 6105 0 R -31 0 V 854 2241 M 31 0 V 6105 0 R -31 0 V 854 2271 M 31 0 V 6105 0 R -31 0 V 854 2297 M 31 0 V 6105 0 R -31 0 V 854 2320 M 63 0 V 6073 0 R -63 0 V stroke 770 2320 M [ [(Helvetica) 140.0 0.0 true true 0 ( 1)] ] -46.7 MRshow 1.000 UL LTb 854 2474 M 31 0 V 6105 0 R -31 0 V 854 2564 M 31 0 V 6105 0 R -31 0 V 854 2628 M 31 0 V 6105 0 R -31 0 V 854 2678 M 31 0 V 6105 0 R -31 0 V 854 2718 M 31 0 V 6105 0 R -31 0 V 854 2752 M 31 0 V 6105 0 R -31 0 V 854 2782 M 31 0 V 6105 0 R -31 0 V 854 2808 M 31 0 V 6105 0 R -31 0 V 854 2832 M 63 0 V 6073 0 R -63 0 V stroke 770 2832 M [ [(Helvetica) 140.0 0.0 true true 0 ( 10)] ] -46.7 MRshow 1.000 UL LTb 854 2986 M 31 0 V 6105 0 R -31 0 V 854 3076 M 31 0 V 6105 0 R -31 0 V 854 3140 M 31 0 V 6105 0 R -31 0 V 854 3189 M 31 0 V 6105 0 R -31 0 V 854 3230 M 31 0 V 6105 0 R -31 0 V 854 3264 M 31 0 V 6105 0 R -31 0 V 854 3293 M 31 0 V 6105 0 R -31 0 V 854 3320 M 31 0 V 6105 0 R -31 0 V 854 3343 M 63 0 V 6073 0 R -63 0 V stroke 770 3343 M [ [(Helvetica) 140.0 0.0 true true 0 ( 100)] ] -46.7 MRshow 1.000 UL LTb 2388 1809 M 0 63 V 0 1471 R 0 -63 V stroke 2388 1669 M [ [(Helvetica) 140.0 0.0 true true 0 (CG)] ] -46.7 MCshow 1.000 UL LTb 3922 1809 M 0 63 V 0 1471 R 0 -63 V stroke 3922 1669 M [ [(Helvetica) 140.0 0.0 true true 0 (BiCGStab)] ] -46.7 MCshow 1.000 UL LTb 5456 1809 M 0 63 V 0 1471 R 0 -63 V stroke 5456 1669 M [ [(Helvetica) 140.0 0.0 true true 0 (GMRES)] ] -46.7 MCshow 1.000 UL LTb 1.000 UL LTb 854 3343 N 0 -1534 V 6136 0 V 0 1534 V -6136 0 V Z stroke LCb setrgbcolor 280 2576 M currentpoint gsave translate 90 rotate 0 0 moveto [ [(Helvetica) 140.0 0.0 true true 0 (Execution Time \(sec\))] ] -46.7 MCshow grestore LTb 1.000 UP 1.000 UL LTb 1.000 UL LT0 LTb 1190 3210 M [ [(Helvetica) 140.0 0.0 true true 0 (CPU)] ] -46.7 MRshow LT0 1.000 1274 3175 399 70 BoxColFill LTb 1274 3175 N 399 0 V 0 70 V -399 0 V 0 -70 V Z stroke LT0 1.000 1877 1809 512 804 BoxColFill LTb 1877 1809 N 0 803 V 511 0 V 0 -803 V -511 0 V Z stroke LT0 1.000 3411 1809 512 950 BoxColFill LTb 3411 1809 N 0 949 V 511 0 V 0 -949 V -511 0 V Z stroke LT0 1.000 4945 1809 512 1059 BoxColFill LTb 4945 1809 N 0 1058 V 511 0 V 0 -1058 V -511 0 V Z stroke LT0 1.000 UL LT1 LTb 1190 3070 M [ [(Helvetica) 140.0 0.0 true true 0 (GPU)] ] -46.7 MRshow LT1 1.000 1274 3035 399 70 BoxColFill LTb 1274 3035 N 399 0 V 0 70 V -399 0 V 0 -70 V Z stroke LT1 1.000 2388 1809 512 354 BoxColFill LTb 2388 1809 N 0 353 V 511 0 V 0 -353 V -511 0 V Z stroke LT1 1.000 3922 1809 512 506 BoxColFill LTb 3922 1809 N 0 505 V 511 0 V 0 -505 V -511 0 V Z stroke LT1 1.000 5456 1809 512 695 BoxColFill LTb 5456 1809 N 0 694 V 511 0 V 0 -694 V -511 0 V Z stroke LT1 1.000 UL LTb 854 3343 N 0 -1534 V 6136 0 V 0 1534 V -6136 0 V Z stroke 1.000 UP 1.000 UL LTb stroke grestore end showpage %%Trailer %%DocumentFonts: Helvetica ViennaCL-1.5.1-src/doc/manual/figures/note.eps000644 001750 001750 00000006656 12267307531 021147 0ustar00rupprupp000000 000000 %!PS-Adobe-2.0 EPSF-2.0 %%Title: sign.eps %%Creator: fig2dev Version 3.2 Patchlevel 1 %%CreationDate: Wed Sep 29 17:03:11 1999 %%For: klima@in8 (Robert Klima,CA0502,36030) %%Orientation: Portrait %%BoundingBox: 0 0 258 218 %%Pages: 0 %%BeginSetup %%EndSetup %%Magnification: 1.0000 %%EndComments /$F2psDict 200 dict def $F2psDict begin $F2psDict /mtrx matrix put /col-1 {0 setgray} bind def /col0 {0.000 0.000 0.000 srgb} bind def /col1 {0.000 0.000 1.000 srgb} bind def /col2 {0.000 1.000 0.000 srgb} bind def /col3 {0.000 1.000 1.000 srgb} bind def /col4 {1.000 0.000 0.000 srgb} bind def /col5 {1.000 0.000 1.000 srgb} bind def /col6 {1.000 1.000 0.000 srgb} bind def /col7 {1.000 1.000 1.000 srgb} bind def /col8 {0.000 0.000 0.560 srgb} bind def /col9 {0.000 0.000 0.690 srgb} bind def /col10 {0.000 0.000 0.820 srgb} bind def /col11 {0.530 0.810 1.000 srgb} bind def /col12 {0.000 0.560 0.000 srgb} bind def /col13 {0.000 0.690 0.000 srgb} bind def /col14 {0.000 0.820 0.000 srgb} bind def /col15 {0.000 0.560 0.560 srgb} bind def /col16 {0.000 0.690 0.690 srgb} bind def /col17 {0.000 0.820 0.820 srgb} bind def /col18 {0.560 0.000 0.000 srgb} bind def /col19 {0.690 0.000 0.000 srgb} bind def /col20 {0.820 0.000 0.000 srgb} bind def /col21 {0.560 0.000 0.560 srgb} bind def /col22 {0.690 0.000 0.690 srgb} bind def /col23 {0.820 0.000 0.820 srgb} bind def /col24 {0.500 0.190 0.000 srgb} bind def /col25 {0.630 0.250 0.000 srgb} bind def /col26 {0.750 0.380 0.000 srgb} bind def /col27 {1.000 0.500 0.500 srgb} bind def /col28 {1.000 0.630 0.630 srgb} bind def /col29 {1.000 0.750 0.750 srgb} bind def /col30 {1.000 0.880 0.880 srgb} bind def /col31 {1.000 0.840 0.000 srgb} bind def end save -140.0 557.0 translate 1 -1 scale /cp {closepath} bind def /ef {eofill} bind def /gr {grestore} bind def /gs {gsave} bind def /sa {save} bind def /rs {restore} bind def /l {lineto} bind def /m {moveto} bind def /rm {rmoveto} bind def /n {newpath} bind def /s {stroke} bind def /sh {show} bind def /slc {setlinecap} bind def /slj {setlinejoin} bind def /slw {setlinewidth} bind def /srgb {setrgbcolor} bind def /rot {rotate} bind def /sc {scale} bind def /sd {setdash} bind def /ff {findfont} bind def /sf {setfont} bind def /scf {scalefont} bind def /sw {stringwidth} bind def /tr {translate} bind def /tnt {dup dup currentrgbcolor 4 -2 roll dup 1 exch sub 3 -1 roll mul add 4 -2 roll dup 1 exch sub 3 -1 roll mul add 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} bind def /shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul 4 -2 roll mul srgb} bind def /DrawEllipse { /endangle exch def /startangle exch def /yrad exch def /xrad exch def /y exch def /x exch def /savematrix mtrx currentmatrix def x y tr xrad yrad sc 0 0 1 startangle endangle arc closepath savematrix setmatrix } def /$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def /$F2psEnd {$F2psEnteredState restore end} def %%EndProlog $F2psBegin 10 setmiterlimit n -1000 9832 m -1000 -1000 l 7312 -1000 l 7312 9832 l cp clip 0.06299 0.06299 sc % Polyline 7.500 slw n 2700 8820 m 4500 5670 l 6300 8820 l cp gs 0.70 setgray ef gr gs col7 s gr % Polyline n 2250 8550 m 4050 5400 l 5850 8550 l cp gs col4 1.00 shd ef gr gs col4 s gr % Polyline n 2764 8247 m 4049 5999 l 5335 8247 l cp gs 0.90 setgray ef gr gs col4 s gr 120.000 slw % Ellipse n 4050 7920 90 90 0 360 DrawEllipse gs 0.00 setgray ef gr gs col0 s gr % Polyline 1 slc 285.000 slw n 4050 6660 m 4050 7470 l gs col0 s gr $F2psEnd rs ViennaCL-1.5.1-src/doc/manual/figures/matvec2.eps000644 001750 001750 00000044250 12267307531 021533 0ustar00rupprupp000000 000000 %!PS-Adobe-2.0 EPSF-2.0 %%Title: matvec.eps %%Creator: gnuplot 4.2 patchlevel 5 %%CreationDate: Wed Apr 7 18:59:17 2010 %%DocumentFonts: (atend) %%BoundingBox: 50 120 410 232 %%EndComments %%BeginProlog /gnudict 256 dict def gnudict begin % % The following 6 true/false flags may be edited by hand if required % The unit line width may also be changed % /Color true def /Blacktext false def /Solid false def /Dashlength 1 def /Landscape false def /Level1 false def /Rounded false def /TransparentPatterns false def /gnulinewidth 5.000 def /userlinewidth gnulinewidth def % /vshift -46 def /dl1 { 10.0 Dashlength mul mul Rounded { currentlinewidth 0.75 mul sub dup 0 le { pop 0.01 } if } if } def /dl2 { 10.0 Dashlength mul mul Rounded { currentlinewidth 0.75 mul add } if } def /hpt_ 31.5 def /vpt_ 31.5 def /hpt hpt_ def /vpt vpt_ def Level1 {} { /SDict 10 dict def systemdict /pdfmark known not { userdict /pdfmark systemdict /cleartomark get put } if SDict begin [ /Title (matvec.eps) /Subject (gnuplot plot) /Creator (gnuplot 4.2 patchlevel 5 ) /Author (Karl Rupp) % /Producer (gnuplot) % /Keywords () /CreationDate (Wed Apr 7 18:59:17 2010) /DOCINFO pdfmark end } ifelse % % Gnuplot Prolog Version 4.2 (August 2006) % /M {moveto} bind def /L {lineto} bind def /R {rmoveto} bind def /V {rlineto} bind def /N {newpath moveto} bind def /Z {closepath} bind def /C {setrgbcolor} bind def /f {rlineto fill} bind def /vpt2 vpt 2 mul def /hpt2 hpt 2 mul def /Lshow {currentpoint stroke M 0 vshift R Blacktext {gsave 0 setgray show grestore} {show} ifelse} def /Rshow {currentpoint stroke M dup stringwidth pop neg vshift R Blacktext {gsave 0 setgray show grestore} {show} ifelse} def /Cshow {currentpoint stroke M dup stringwidth pop -2 div vshift R Blacktext {gsave 0 setgray show grestore} {show} ifelse} def /UP {dup vpt_ mul /vpt exch def hpt_ mul /hpt exch def /hpt2 hpt 2 mul def /vpt2 vpt 2 mul def} def /DL {Color {setrgbcolor Solid {pop []} if 0 setdash} {pop pop pop 0 setgray Solid {pop []} if 0 setdash} ifelse} def /BL {stroke userlinewidth 2 mul setlinewidth Rounded {1 setlinejoin 1 setlinecap} if} def /AL {stroke userlinewidth 2 div setlinewidth Rounded {1 setlinejoin 1 setlinecap} if} def /UL {dup gnulinewidth mul /userlinewidth exch def dup 1 lt {pop 1} if 10 mul /udl exch def} def /PL {stroke userlinewidth setlinewidth Rounded {1 setlinejoin 1 setlinecap} if} def % Default Line colors /LCw {1 1 1} def /LCb {0 0 0} def /LCa {0 0 0} def /LC0 {1 0 0} def /LC1 {0 1 0} def /LC2 {0 0 1} def /LC3 {1 0 1} def /LC4 {0 1 1} def /LC5 {1 1 0} def /LC6 {0 0 0} def /LC7 {1 0.3 0} def /LC8 {0.5 0.5 0.5} def % Default Line Types /LTw {PL [] 1 setgray} def /LTb {BL [] LCb DL} def /LTa {AL [1 udl mul 2 udl mul] 0 setdash LCa setrgbcolor} def /LT0 {PL [] LC0 DL} def /LT1 {PL [4 dl1 2 dl2] LC1 DL} def /LT2 {PL [2 dl1 3 dl2] LC2 DL} def /LT3 {PL [1 dl1 1.5 dl2] LC3 DL} def /LT4 {PL [6 dl1 2 dl2 1 dl1 2 dl2] LC4 DL} def /LT5 {PL [3 dl1 3 dl2 1 dl1 3 dl2] LC5 DL} def /LT6 {PL [2 dl1 2 dl2 2 dl1 6 dl2] LC6 DL} def /LT7 {PL [1 dl1 2 dl2 6 dl1 2 dl2 1 dl1 2 dl2] LC7 DL} def /LT8 {PL [2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 4 dl2] LC8 DL} def /Pnt {stroke [] 0 setdash gsave 1 setlinecap M 0 0 V stroke grestore} def /Dia {stroke [] 0 setdash 2 copy vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V closepath stroke Pnt} def /Pls {stroke [] 0 setdash vpt sub M 0 vpt2 V currentpoint stroke M hpt neg vpt neg R hpt2 0 V stroke } def /Box {stroke [] 0 setdash 2 copy exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V closepath stroke Pnt} def /Crs {stroke [] 0 setdash exch hpt sub exch vpt add M hpt2 vpt2 neg V currentpoint stroke M hpt2 neg 0 R hpt2 vpt2 V stroke} def /TriU {stroke [] 0 setdash 2 copy vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V closepath stroke Pnt} def /Star {2 copy Pls Crs} def /BoxF {stroke [] 0 setdash exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V closepath fill} def /TriUF {stroke [] 0 setdash vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V closepath fill} def /TriD {stroke [] 0 setdash 2 copy vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V closepath stroke Pnt} def /TriDF {stroke [] 0 setdash vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V closepath fill} def /DiaF {stroke [] 0 setdash vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V closepath fill} def /Pent {stroke [] 0 setdash 2 copy gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat closepath stroke grestore Pnt} def /PentF {stroke [] 0 setdash gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat closepath fill grestore} def /Circle {stroke [] 0 setdash 2 copy hpt 0 360 arc stroke Pnt} def /CircleF {stroke [] 0 setdash hpt 0 360 arc fill} def /C0 {BL [] 0 setdash 2 copy moveto vpt 90 450 arc} bind def /C1 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 90 arc closepath fill vpt 0 360 arc closepath} bind def /C2 {BL [] 0 setdash 2 copy moveto 2 copy vpt 90 180 arc closepath fill vpt 0 360 arc closepath} bind def /C3 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 180 arc closepath fill vpt 0 360 arc closepath} bind def /C4 {BL [] 0 setdash 2 copy moveto 2 copy vpt 180 270 arc closepath fill vpt 0 360 arc closepath} bind def /C5 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 90 arc 2 copy moveto 2 copy vpt 180 270 arc closepath fill vpt 0 360 arc} bind def /C6 {BL [] 0 setdash 2 copy moveto 2 copy vpt 90 270 arc closepath fill vpt 0 360 arc closepath} bind def /C7 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 270 arc closepath fill vpt 0 360 arc closepath} bind def /C8 {BL [] 0 setdash 2 copy moveto 2 copy vpt 270 360 arc closepath fill vpt 0 360 arc closepath} bind def /C9 {BL [] 0 setdash 2 copy moveto 2 copy vpt 270 450 arc closepath fill vpt 0 360 arc closepath} bind def /C10 {BL [] 0 setdash 2 copy 2 copy moveto vpt 270 360 arc closepath fill 2 copy moveto 2 copy vpt 90 180 arc closepath fill vpt 0 360 arc closepath} bind def /C11 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 180 arc closepath fill 2 copy moveto 2 copy vpt 270 360 arc closepath fill vpt 0 360 arc closepath} bind def /C12 {BL [] 0 setdash 2 copy moveto 2 copy vpt 180 360 arc closepath fill vpt 0 360 arc closepath} bind def /C13 {BL [] 0 setdash 2 copy moveto 2 copy vpt 0 90 arc closepath fill 2 copy moveto 2 copy vpt 180 360 arc closepath fill vpt 0 360 arc closepath} bind def /C14 {BL [] 0 setdash 2 copy moveto 2 copy vpt 90 360 arc closepath fill vpt 0 360 arc} bind def /C15 {BL [] 0 setdash 2 copy vpt 0 360 arc closepath fill vpt 0 360 arc closepath} bind def /Rec {newpath 4 2 roll moveto 1 index 0 rlineto 0 exch rlineto neg 0 rlineto closepath} bind def /Square {dup Rec} bind def /Bsquare {vpt sub exch vpt sub exch vpt2 Square} bind def /S0 {BL [] 0 setdash 2 copy moveto 0 vpt rlineto BL Bsquare} bind def /S1 {BL [] 0 setdash 2 copy vpt Square fill Bsquare} bind def /S2 {BL [] 0 setdash 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def /S3 {BL [] 0 setdash 2 copy exch vpt sub exch vpt2 vpt Rec fill Bsquare} bind def /S4 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def /S5 {BL [] 0 setdash 2 copy 2 copy vpt Square fill exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def /S6 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill Bsquare} bind def /S7 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill 2 copy vpt Square fill Bsquare} bind def /S8 {BL [] 0 setdash 2 copy vpt sub vpt Square fill Bsquare} bind def /S9 {BL [] 0 setdash 2 copy vpt sub vpt vpt2 Rec fill Bsquare} bind def /S10 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def /S11 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt2 vpt Rec fill Bsquare} bind def /S12 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill Bsquare} bind def /S13 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill 2 copy vpt Square fill Bsquare} bind def /S14 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def /S15 {BL [] 0 setdash 2 copy Bsquare fill Bsquare} bind def /D0 {gsave translate 45 rotate 0 0 S0 stroke grestore} bind def /D1 {gsave translate 45 rotate 0 0 S1 stroke grestore} bind def /D2 {gsave translate 45 rotate 0 0 S2 stroke grestore} bind def /D3 {gsave translate 45 rotate 0 0 S3 stroke grestore} bind def /D4 {gsave translate 45 rotate 0 0 S4 stroke grestore} bind def /D5 {gsave translate 45 rotate 0 0 S5 stroke grestore} bind def /D6 {gsave translate 45 rotate 0 0 S6 stroke grestore} bind def /D7 {gsave translate 45 rotate 0 0 S7 stroke grestore} bind def /D8 {gsave translate 45 rotate 0 0 S8 stroke grestore} bind def /D9 {gsave translate 45 rotate 0 0 S9 stroke grestore} bind def /D10 {gsave translate 45 rotate 0 0 S10 stroke grestore} bind def /D11 {gsave translate 45 rotate 0 0 S11 stroke grestore} bind def /D12 {gsave translate 45 rotate 0 0 S12 stroke grestore} bind def /D13 {gsave translate 45 rotate 0 0 S13 stroke grestore} bind def /D14 {gsave translate 45 rotate 0 0 S14 stroke grestore} bind def /D15 {gsave translate 45 rotate 0 0 S15 stroke grestore} bind def /DiaE {stroke [] 0 setdash vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V closepath stroke} def /BoxE {stroke [] 0 setdash exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V closepath stroke} def /TriUE {stroke [] 0 setdash vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V closepath stroke} def /TriDE {stroke [] 0 setdash vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V closepath stroke} def /PentE {stroke [] 0 setdash gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat closepath stroke grestore} def /CircE {stroke [] 0 setdash hpt 0 360 arc stroke} def /Opaque {gsave closepath 1 setgray fill grestore 0 setgray closepath} def /DiaW {stroke [] 0 setdash vpt add M hpt neg vpt neg V hpt vpt neg V hpt vpt V hpt neg vpt V Opaque stroke} def /BoxW {stroke [] 0 setdash exch hpt sub exch vpt add M 0 vpt2 neg V hpt2 0 V 0 vpt2 V hpt2 neg 0 V Opaque stroke} def /TriUW {stroke [] 0 setdash vpt 1.12 mul add M hpt neg vpt -1.62 mul V hpt 2 mul 0 V hpt neg vpt 1.62 mul V Opaque stroke} def /TriDW {stroke [] 0 setdash vpt 1.12 mul sub M hpt neg vpt 1.62 mul V hpt 2 mul 0 V hpt neg vpt -1.62 mul V Opaque stroke} def /PentW {stroke [] 0 setdash gsave translate 0 hpt M 4 {72 rotate 0 hpt L} repeat Opaque stroke grestore} def /CircW {stroke [] 0 setdash hpt 0 360 arc Opaque stroke} def /BoxFill {gsave Rec 1 setgray fill grestore} def /Density { /Fillden exch def currentrgbcolor /ColB exch def /ColG exch def /ColR exch def /ColR ColR Fillden mul Fillden sub 1 add def /ColG ColG Fillden mul Fillden sub 1 add def /ColB ColB Fillden mul Fillden sub 1 add def ColR ColG ColB setrgbcolor} def /BoxColFill {gsave Rec PolyFill} def /PolyFill {gsave Density fill grestore grestore} def /h {rlineto rlineto rlineto gsave closepath fill grestore} bind def % % PostScript Level 1 Pattern Fill routine for rectangles % Usage: x y w h s a XX PatternFill % x,y = lower left corner of box to be filled % w,h = width and height of box % a = angle in degrees between lines and x-axis % XX = 0/1 for no/yes cross-hatch % /PatternFill {gsave /PFa [ 9 2 roll ] def PFa 0 get PFa 2 get 2 div add PFa 1 get PFa 3 get 2 div add translate PFa 2 get -2 div PFa 3 get -2 div PFa 2 get PFa 3 get Rec gsave 1 setgray fill grestore clip currentlinewidth 0.5 mul setlinewidth /PFs PFa 2 get dup mul PFa 3 get dup mul add sqrt def 0 0 M PFa 5 get rotate PFs -2 div dup translate 0 1 PFs PFa 4 get div 1 add floor cvi {PFa 4 get mul 0 M 0 PFs V} for 0 PFa 6 get ne { 0 1 PFs PFa 4 get div 1 add floor cvi {PFa 4 get mul 0 2 1 roll M PFs 0 V} for } if stroke grestore} def % /languagelevel where {pop languagelevel} {1} ifelse 2 lt {/InterpretLevel1 true def} {/InterpretLevel1 Level1 def} ifelse % % PostScript level 2 pattern fill definitions % /Level2PatternFill { /Tile8x8 {/PaintType 2 /PatternType 1 /TilingType 1 /BBox [0 0 8 8] /XStep 8 /YStep 8} bind def /KeepColor {currentrgbcolor [/Pattern /DeviceRGB] setcolorspace} bind def << Tile8x8 /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke} >> matrix makepattern /Pat1 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke 0 4 M 4 8 L 8 4 L 4 0 L 0 4 L stroke} >> matrix makepattern /Pat2 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop 0 0 M 0 8 L 8 8 L 8 0 L 0 0 L fill} >> matrix makepattern /Pat3 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop -4 8 M 8 -4 L 0 12 M 12 0 L stroke} >> matrix makepattern /Pat4 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop -4 0 M 8 12 L 0 -4 M 12 8 L stroke} >> matrix makepattern /Pat5 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop -2 8 M 4 -4 L 0 12 M 8 -4 L 4 12 M 10 0 L stroke} >> matrix makepattern /Pat6 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop -2 0 M 4 12 L 0 -4 M 8 12 L 4 -4 M 10 8 L stroke} >> matrix makepattern /Pat7 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop 8 -2 M -4 4 L 12 0 M -4 8 L 12 4 M 0 10 L stroke} >> matrix makepattern /Pat8 exch def << Tile8x8 /PaintProc {0.5 setlinewidth pop 0 -2 M 12 4 L -4 0 M 12 8 L -4 4 M 8 10 L stroke} >> matrix makepattern /Pat9 exch def /Pattern1 {PatternBgnd KeepColor Pat1 setpattern} bind def /Pattern2 {PatternBgnd KeepColor Pat2 setpattern} bind def /Pattern3 {PatternBgnd KeepColor Pat3 setpattern} bind def /Pattern4 {PatternBgnd KeepColor Landscape {Pat5} {Pat4} ifelse setpattern} bind def /Pattern5 {PatternBgnd KeepColor Landscape {Pat4} {Pat5} ifelse setpattern} bind def /Pattern6 {PatternBgnd KeepColor Landscape {Pat9} {Pat6} ifelse setpattern} bind def /Pattern7 {PatternBgnd KeepColor Landscape {Pat8} {Pat7} ifelse setpattern} bind def } def % % %End of PostScript Level 2 code % /PatternBgnd { TransparentPatterns {} {gsave 1 setgray fill grestore} ifelse } def % % Substitute for Level 2 pattern fill codes with % grayscale if Level 2 support is not selected. % /Level1PatternFill { /Pattern1 {0.250 Density} bind def /Pattern2 {0.500 Density} bind def /Pattern3 {0.750 Density} bind def /Pattern4 {0.125 Density} bind def /Pattern5 {0.375 Density} bind def /Pattern6 {0.625 Density} bind def /Pattern7 {0.875 Density} bind def } def % % Now test for support of Level 2 code % Level1 {Level1PatternFill} {Level2PatternFill} ifelse % /Symbol-Oblique /Symbol findfont [1 0 .167 1 0 0] makefont dup length dict begin {1 index /FID eq {pop pop} {def} ifelse} forall currentdict end definefont pop /MFshow { { dup 5 get 3 ge { 5 get 3 eq {gsave} {grestore} ifelse } {dup dup 0 get findfont exch 1 get scalefont setfont [ currentpoint ] exch dup 2 get 0 exch R dup 5 get 2 ne {dup dup 6 get exch 4 get {show} {stringwidth pop 0 R} ifelse }if dup 5 get 0 eq {dup 3 get {2 get neg 0 exch R pop} {pop aload pop M} ifelse} {dup 5 get 1 eq {dup 2 get exch dup 3 get exch 6 get stringwidth pop -2 div dup 0 R} {dup 6 get stringwidth pop -2 div 0 R 6 get show 2 index {aload pop M neg 3 -1 roll neg R pop pop} {pop pop pop pop aload pop M} ifelse }ifelse }ifelse } ifelse } forall} bind def /MFwidth {0 exch { dup 5 get 3 ge { 5 get 3 eq { 0 } { pop } ifelse } {dup 3 get{dup dup 0 get findfont exch 1 get scalefont setfont 6 get stringwidth pop add} {pop} ifelse} ifelse} forall} bind def /MLshow { currentpoint stroke M 0 exch R Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def /MRshow { currentpoint stroke M exch dup MFwidth neg 3 -1 roll R Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def /MCshow { currentpoint stroke M exch dup MFwidth -2 div 3 -1 roll R Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def /XYsave { [( ) 1 2 true false 3 ()] } bind def /XYrestore { [( ) 1 2 true false 4 ()] } bind def end %%EndProlog gnudict begin gsave 50 50 translate 0.050 0.050 scale 0 setgray newpath (Helvetica) findfont 140 scalefont setfont 1.000 UL LTb 938 1819 M 63 0 V 5989 0 R -63 0 V stroke 854 1819 M [ [(Helvetica) 140.0 0.0 true true 0 ( 0.01)] ] -46.7 MRshow 1.000 UL LTb 938 2047 M 31 0 V 6021 0 R -31 0 V 938 2180 M 31 0 V 6021 0 R -31 0 V 938 2275 M 31 0 V 6021 0 R -31 0 V 938 2348 M 31 0 V 6021 0 R -31 0 V 938 2408 M 31 0 V 6021 0 R -31 0 V 938 2459 M 31 0 V 6021 0 R -31 0 V 938 2503 M 31 0 V 6021 0 R -31 0 V 938 2541 M 31 0 V 6021 0 R -31 0 V 938 2576 M 63 0 V 5989 0 R -63 0 V stroke 854 2576 M [ [(Helvetica) 140.0 0.0 true true 0 ( 0.1)] ] -46.7 MRshow 1.000 UL LTb 938 2804 M 31 0 V 6021 0 R -31 0 V 938 2937 M 31 0 V 6021 0 R -31 0 V 938 3032 M 31 0 V 6021 0 R -31 0 V 938 3105 M 31 0 V 6021 0 R -31 0 V 938 3165 M 31 0 V 6021 0 R -31 0 V 938 3216 M 31 0 V 6021 0 R -31 0 V 938 3260 M 31 0 V 6021 0 R -31 0 V 938 3298 M 31 0 V 6021 0 R -31 0 V 938 3333 M 63 0 V 5989 0 R -63 0 V stroke 854 3333 M [ [(Helvetica) 140.0 0.0 true true 0 ( 1)] ] -46.7 MRshow 1.000 UL LTb 3964 1819 M 0 63 V 0 1451 R 0 -63 V stroke 3964 1679 M [ [(Helvetica) 140.0 0.0 true true 0 (1)] ] -46.7 MCshow 1.000 UL LTb 1.000 UL LTb 938 3333 N 0 -1514 V 6052 0 V 0 1514 V -6052 0 V Z stroke LCb setrgbcolor 280 2576 M currentpoint gsave translate 90 rotate 0 0 moveto [ [(Helvetica) 140.0 0.0 true true 0 (Execution Time \(ms\))] ] -46.7 MCshow grestore LTb 1.000 UP 1.000 UL LTb 1.000 UL LT0 LTb 1274 3200 M [ [(Helvetica) 140.0 0.0 true true 0 (CPU)] ] -46.7 MRshow LT0 1.000 1358 3165 399 70 BoxColFill LTb 1358 3165 N 399 0 V 0 70 V -399 0 V 0 -70 V Z stroke LT0 1.000 2451 1819 2018 1336 BoxColFill LTb 2451 1819 N 0 1335 V 2017 0 V 0 -1335 V -2017 0 V Z stroke LT0 1.000 UL LT1 LTb 1274 3060 M [ [(Helvetica) 140.0 0.0 true true 0 (GPU)] ] -46.7 MRshow LT1 1.000 1358 3025 399 70 BoxColFill LTb 1358 3025 N 399 0 V 0 70 V -399 0 V 0 -70 V Z stroke LT1 1.000 3460 1819 2018 723 BoxColFill LTb 3460 1819 N 0 722 V 2017 0 V 0 -722 V -2017 0 V Z stroke LT1 1.000 UL LTb 938 3333 N 0 -1514 V 6052 0 V 0 1514 V -6052 0 V Z stroke 1.000 UP 1.000 UL LTb stroke grestore end showpage %%Trailer %%DocumentFonts: Helvetica ViennaCL-1.5.1-src/doc/manual/figures/tip.eps000644 001750 001750 00000022734 12267307531 020771 0ustar00rupprupp000000 000000 %!PS-Adobe-2.0 EPSF-2.0 % $Id: tip.eps,v 1.1.1.1 2001/03/19 16:35:04 uid330 Exp $ %%Title: tip.eps %%Creator: fig2dev Version 3.2 Patchlevel 0-beta3 %%CreationDate: Fri Oct 16 16:26:28 1998 %%For: grasser@a64.iue.tuwien.ac.at (Tibor Grasser,36023) %%Orientation: Portrait %%BoundingBox: 0 0 316 300 %%Pages: 0 %%BeginSetup %%EndSetup %%Magnification: 1.0000 %%EndComments /$F2psDict 200 dict def $F2psDict begin $F2psDict /mtrx matrix put /col-1 {0 setgray} bind def /col0 {0.000 0.000 0.000 srgb} bind def /col1 {0.000 0.000 1.000 srgb} bind def /col2 {0.000 1.000 0.000 srgb} bind def /col3 {0.000 1.000 1.000 srgb} bind def /col4 {1.000 0.000 0.000 srgb} bind def /col5 {1.000 0.000 1.000 srgb} bind def /col6 {1.000 1.000 0.000 srgb} bind def /col7 {1.000 1.000 1.000 srgb} bind def /col8 {0.000 0.000 0.560 srgb} bind def /col9 {0.000 0.000 0.690 srgb} bind def /col10 {0.000 0.000 0.820 srgb} bind def /col11 {0.530 0.810 1.000 srgb} bind def /col12 {0.000 0.560 0.000 srgb} bind def /col13 {0.000 0.690 0.000 srgb} bind def /col14 {0.000 0.820 0.000 srgb} bind def /col15 {0.000 0.560 0.560 srgb} bind def /col16 {0.000 0.690 0.690 srgb} bind def /col17 {0.000 0.820 0.820 srgb} bind def /col18 {0.560 0.000 0.000 srgb} bind def /col19 {0.690 0.000 0.000 srgb} bind def /col20 {0.820 0.000 0.000 srgb} bind def /col21 {0.560 0.000 0.560 srgb} bind def /col22 {0.690 0.000 0.690 srgb} bind def /col23 {0.820 0.000 0.820 srgb} bind def /col24 {0.500 0.190 0.000 srgb} bind def /col25 {0.630 0.250 0.000 srgb} bind def /col26 {0.750 0.380 0.000 srgb} bind def /col27 {1.000 0.500 0.500 srgb} bind def /col28 {1.000 0.630 0.630 srgb} bind def /col29 {1.000 0.750 0.750 srgb} bind def /col30 {1.000 0.880 0.880 srgb} bind def /col31 {1.000 0.840 0.000 srgb} bind def end save -352.0 310.0 translate 1 -1 scale /cp {closepath} bind def /ef {eofill} bind def /gr {grestore} bind def /gs {gsave} bind def /sa {save} bind def /rs {restore} bind def /l {lineto} bind def /m {moveto} bind def /rm {rmoveto} bind def /n {newpath} bind def /s {stroke} bind def /sh {show} bind def /slc {setlinecap} bind def /slj {setlinejoin} bind def /slw {setlinewidth} bind def /srgb {setrgbcolor} bind def /rot {rotate} bind def /sc {scale} bind def /sd {setdash} bind def /ff {findfont} bind def /sf {setfont} bind def /scf {scalefont} bind def /sw {stringwidth} bind def /tr {translate} bind def /tnt {dup dup currentrgbcolor 4 -2 roll dup 1 exch sub 3 -1 roll mul add 4 -2 roll dup 1 exch sub 3 -1 roll mul add 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} bind def /shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul 4 -2 roll mul srgb} bind def /$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def /$F2psEnd {$F2psEnteredState restore end} def %%EndProlog $F2psBegin 10 setmiterlimit n -1000 5917 m -1000 -1000 l 11598 -1000 l 11598 5917 l cp clip 0.06299 0.06299 sc % Polyline 15.000 slw n 7523 1685 m 7501 1697 l 7479 1709 l 7458 1723 l 7437 1737 l 7417 1752 l 7396 1768 l 7376 1784 l 7356 1801 l 7336 1818 l 7317 1835 l 7297 1853 l 7278 1871 l 7259 1889 l 7240 1907 l 7221 1926 l 7203 1945 l 7185 1964 l 7167 1984 l 7150 2005 l 7134 2026 l 7118 2047 l 7103 2070 l 7090 2092 l 7077 2114 l 7065 2137 l 7053 2161 l 7042 2186 l 7030 2212 l 7019 2238 l 7008 2265 l 6997 2292 l 6986 2320 l 6975 2348 l 6964 2376 l 6954 2404 l 6944 2432 l 6934 2460 l 6925 2488 l 6917 2516 l 6909 2543 l 6901 2570 l 6895 2597 l 6889 2623 l 6884 2649 l 6880 2675 l 6878 2700 l 6877 2725 l 6877 2750 l 6878 2776 l 6879 2801 l 6882 2826 l 6886 2851 l 6890 2876 l 6895 2902 l 6900 2927 l 6906 2952 l 6912 2978 l 6918 3003 l 6925 3028 l 6932 3053 l 6939 3078 l 6946 3103 l 6953 3127 l 6961 3151 l 6969 3175 l 6977 3198 l 6985 3220 l 6994 3243 l 7003 3264 l 7013 3285 l 7025 3307 l 7037 3328 l 7051 3349 l 7065 3369 l 7080 3388 l 7096 3407 l 7113 3425 l 7130 3443 l 7147 3460 l 7165 3477 l 7182 3494 l 7200 3511 l 7218 3527 l 7236 3544 l 7254 3561 l 7272 3579 l 7289 3597 l 7307 3615 l 7323 3633 l 7340 3652 l 7357 3671 l 7373 3690 l 7388 3708 l 7403 3726 l 7418 3745 l 7433 3764 l 7448 3783 l 7463 3803 l 7478 3823 l 7493 3844 l 7508 3864 l 7523 3885 l 7538 3907 l 7553 3928 l 7568 3949 l 7583 3969 l 7598 3990 l 7613 4010 l 7628 4029 l 7643 4048 l 7658 4066 l 7673 4083 l 7688 4099 l 7703 4113 l 7718 4127 l 7733 4140 l 7753 4155 l 7773 4169 l 7793 4181 l 7813 4192 l 7833 4202 l 7853 4211 l 7873 4220 l 7893 4228 l 7913 4235 l 7933 4242 l 7953 4248 l 7973 4254 l 7993 4260 l 8013 4265 l 8033 4269 l 8053 4272 l 8073 4274 l 8093 4275 l 8113 4274 l 8133 4272 l 8153 4269 l 8173 4264 l 8193 4259 l 8213 4254 l 8234 4247 l 8254 4241 l 8274 4234 l 8294 4226 l 8314 4218 l 8334 4209 l 8354 4200 l 8374 4190 l 8394 4179 l 8414 4168 l 8433 4155 l 8453 4140 l 8469 4127 l 8485 4112 l 8500 4097 l 8516 4080 l 8531 4063 l 8547 4044 l 8562 4025 l 8577 4005 l 8592 3985 l 8607 3964 l 8622 3943 l 8637 3922 l 8652 3900 l 8667 3879 l 8683 3858 l 8698 3838 l 8713 3817 l 8729 3797 l 8745 3777 l 8761 3758 l 8777 3739 l 8793 3720 l 8809 3701 l 8826 3682 l 8843 3664 l 8861 3646 l 8879 3628 l 8897 3610 l 8916 3593 l 8935 3575 l 8954 3558 l 8973 3541 l 8992 3524 l 9011 3507 l 9030 3489 l 9049 3472 l 9067 3454 l 9085 3435 l 9101 3416 l 9118 3396 l 9133 3376 l 9147 3355 l 9161 3333 l 9173 3310 l 9184 3288 l 9193 3266 l 9203 3243 l 9212 3220 l 9220 3195 l 9229 3171 l 9237 3145 l 9244 3120 l 9252 3094 l 9260 3068 l 9267 3041 l 9274 3014 l 9281 2988 l 9287 2961 l 9293 2934 l 9299 2907 l 9304 2881 l 9309 2855 l 9312 2828 l 9315 2802 l 9318 2777 l 9319 2751 l 9319 2725 l 9318 2700 l 9316 2675 l 9312 2649 l 9308 2623 l 9302 2598 l 9295 2572 l 9288 2546 l 9280 2519 l 9271 2493 l 9262 2466 l 9252 2439 l 9242 2413 l 9232 2386 l 9221 2359 l 9210 2333 l 9199 2306 l 9188 2280 l 9177 2255 l 9165 2230 l 9154 2205 l 9142 2181 l 9130 2157 l 9118 2134 l 9106 2112 l 9093 2090 l 9079 2069 l 9065 2048 l 9051 2028 l 9036 2009 l 9020 1989 l 9005 1970 l 8989 1951 l 8972 1932 l 8956 1914 l 8939 1895 l 8922 1877 l 8905 1858 l 8888 1840 l 8871 1823 l 8853 1805 l 8835 1788 l 8817 1772 l 8799 1756 l 8781 1740 l 8762 1725 l 8743 1711 l 8724 1698 l 8704 1686 l 8683 1675 l 8662 1665 l 8640 1656 l 8617 1648 l 8593 1641 l 8568 1635 l 8541 1629 l 8514 1624 l 8486 1620 l 8457 1617 l 8428 1613 l 8399 1610 l 8369 1608 l 8340 1605 l 8311 1603 l 8282 1601 l 8255 1598 l 8229 1596 l 8204 1594 l 8181 1591 l 8159 1589 l 8139 1587 l 8122 1584 l 8106 1582 l 8093 1580 l 8086 1579 l 8079 1578 l 8073 1577 l 8069 1576 l 8065 1576 l 8062 1575 l 8060 1575 l 8059 1574 l 8061 1573 l 8063 1573 l 8065 1573 l 8068 1573 l 8072 1573 l 8076 1573 l 8080 1573 l 8085 1573 l 8089 1573 l 8094 1574 l 8098 1574 l 8103 1574 l 8107 1574 l 8110 1575 l 8114 1575 l 8116 1575 l 8118 1576 l 8120 1576 l 8120 1577 l 8119 1578 l 8118 1579 l 8115 1580 l 8111 1580 l 8107 1581 l 8101 1582 l 8095 1584 l 8088 1585 l 8074 1588 l 8057 1590 l 8039 1593 l 8018 1596 l 7994 1599 l 7969 1602 l 7943 1605 l 7915 1608 l 7886 1611 l 7855 1614 l 7825 1617 l 7794 1620 l 7764 1624 l 7733 1628 l 7703 1632 l 7675 1637 l 7647 1643 l 7620 1650 l 7594 1657 l 7569 1665 l 7546 1675 l cp gs col6 1.00 shd ef gr gs col0 s gr % Polyline 7.500 slw n 7733 4140 m 8453 4140 l 8453 4770 l 7733 4770 l cp gs col7 0.50 shd ef gr gs col0 s gr % Polyline 30.000 slw n 8228 4140 m 8228 3690 l 8588 2925 l gs col0 s gr % Polyline 1 slc 90.000 slw n 8093 1124 m 8093 240 l gs col6 s gr % Polyline n 8648 1214 m 9008 367 l gs col6 s gr % Polyline n 9555 2159 m 10350 1679 l gs col6 s gr % Polyline n 9683 2767 m 10523 2789 l gs col6 s gr % Polyline 0 slc 7.500 slw n 7823 4770 m 8363 4770 l 8363 4905 l 7823 4905 l cp gs 0.00 setgray ef gr gs col0 s gr % Polyline 1 slc 90.000 slw n 9503 3487 m 10079 4000 l gs col6 s gr % Polyline 0 slc 30.000 slw n 7950 4157 m 7950 3707 l 7590 2942 l gs col0 s gr % Polyline 1 slc 90.000 slw n 9180 1529 m 9795 840 l gs col6 s gr % Polyline n 7538 1222 m 7185 375 l gs col6 s gr % Polyline n 6631 2167 m 5836 1687 l gs col6 s gr % Polyline n 6503 2775 m 5663 2797 l gs col6 s gr % Polyline n 6683 3495 m 6107 4008 l gs col6 s gr % Polyline n 7006 1537 m 6391 848 l gs col6 s gr % Polyline 0 slc 7.500 slw n 7598 2970 m 7598 2969 l 7598 2966 l 7599 2957 l 7600 2942 l 7602 2922 l 7605 2897 l 7608 2869 l 7611 2842 l 7615 2816 l 7619 2793 l 7624 2774 l 7629 2760 l 7636 2750 l 7643 2745 l 7652 2744 l 7662 2747 l 7673 2753 l 7686 2764 l 7700 2778 l 7714 2793 l 7729 2810 l 7744 2827 l 7758 2843 l 7773 2857 l 7786 2868 l 7799 2876 l 7811 2880 l 7823 2880 l 7834 2877 l 7845 2870 l 7855 2859 l 7865 2845 l 7874 2827 l 7883 2808 l 7893 2788 l 7902 2768 l 7911 2749 l 7920 2732 l 7929 2718 l 7939 2708 l 7948 2702 l 7958 2700 l 7968 2702 l 7977 2708 l 7987 2719 l 7997 2733 l 8006 2751 l 8016 2771 l 8026 2792 l 8035 2813 l 8045 2833 l 8054 2851 l 8064 2865 l 8074 2874 l 8083 2879 l 8093 2880 l 8101 2877 l 8110 2870 l 8118 2860 l 8127 2846 l 8135 2829 l 8144 2809 l 8152 2788 l 8161 2765 l 8169 2743 l 8177 2722 l 8186 2702 l 8194 2686 l 8203 2672 l 8211 2663 l 8220 2657 l 8228 2655 l 8236 2657 l 8245 2663 l 8253 2672 l 8262 2686 l 8270 2702 l 8279 2722 l 8287 2743 l 8296 2765 l 8304 2788 l 8312 2809 l 8321 2829 l 8329 2846 l 8338 2860 l 8346 2870 l 8355 2877 l 8363 2880 l 8373 2879 l 8382 2874 l 8392 2864 l 8402 2849 l 8412 2831 l 8422 2810 l 8433 2788 l 8443 2766 l 8453 2745 l 8462 2727 l 8472 2713 l 8481 2704 l 8490 2699 l 8498 2700 l 8505 2705 l 8512 2713 l 8519 2727 l 8526 2744 l 8533 2766 l 8541 2791 l 8549 2819 l 8557 2849 l 8565 2878 l 8572 2905 l 8578 2928 l 8582 2946 l 8585 2959 l 8587 2967 l 8588 2970 l gs col0 s gr $F2psEnd rs ViennaCL-1.5.1-src/doc/manual/benchmarks.tex000644 001750 001750 00000007455 12235435247 020663 0ustar00rupprupp000000 000000 \chapter{Benchmark Results} We have compared the performance gain of {\ViennaCL} with standard CPU implementations using a single core. The code used for the benchmarks can be found in the folder \texttt{examples/benchmark/} within the source-release of {\ViennaCL}. Results are grouped by computational complexity and can be found in the subsequent sections. \begin{center} \begin{tabular}{|l|l|} \hline CPU & AMD Phenom II X4-965 \\ RAM & 8 GB \\ OS & Funtoo Linux 64 bit \\ \hline Kernel for AMD cards: & 2.6.33 \\ AMD driver version: & 10.4 \\ \hline Kernel for Nvidia cards: & 2.6.34 \\ Nvidia driver version: & 195.36.24 \\ \hline {\ViennaCL} version & 1.0.0 \\ \hline \end{tabular} \end{center} \NOTE{Compute kernels are not fully optimized yet, results are likely to improve considerably in future releases of {\ViennaCL}} \TIP{Due to only partial support of double precision by GPUs from ATI at the time of these benchmarks, double precision arithmetics is not included, cf.~Tab.~\ref{tab:double-precision-GPUs}.} \NOTE{When benchmarking {\ViennaCL}, first a dummy call to the functionality of interest should be issued prior to taking timings. Otherwise, benchmark results include the just-in-time compilation, which is a constant independent of the data size.} \section{Vector Operations} Benchmarks for the addition of two vectors and the computation of inner products are shown in Tab.~\ref{tab:vectorbench}. \begin{table}[tb] \begin{center} \begin{tabular}{l|c|c|c|c} Compute Device & add, float & add, double & prod, float & prod, double\\ \hline CPU & 0.174 & 0.347 & 0.408 & 0.430 \\ NVIDIA GTX 260 & 0.087 & 0.089 & 0.044 & 0.072\\ NVIDIA GTX 470 & 0.042 & 0.133 & 0.050 & 0.053 \\ ATI Radeon 5850 & 0.026 & - & 0.105 & - \\ \end{tabular} \caption{Execution times (seconds) for vector addition and inner products.} \label{tab:vectorbench} \end{center} \end{table} \section{Matrix-Vector Multiplication} We have compared execution times of the operation \begin{align} \mathbf{y} = \mathbf{A} \mathbf{x} \ , \end{align} where $\mathbf{A}$ is a sparse matrix (ten entries per column on average). The results in Tab.~\ref{tab:sparsebench} shows that by the use of {\ViennaCL} and a mid-range GPU, performance gains of up to one order of magnitude can be obtained. \begin{table}[tb] \begin{center} \begin{tabular}{l|c|c} Compute Device & float & double \\ \hline CPU & 0.0333 & 0.0352 \\ NVIDIA GTX 260 & 0.0028 & 0.0043 \\ NVIDIA GTX 470 & 0.0024 & 0.0041 \\ ATI Radeon 5850 & 0.0032 & - \\ \end{tabular} \caption{Execution times (seconds) for sparse matrix-vector multiplication using \texttt{compressed\_matrix}.} \label{tab:sparsebench} \end{center} \end{table} \section{Iterative Solver Performance} The solution of a system of linear equations is encountered in many simulators. It is often seen as a black-box: System matrix and right hand side vector in, solution out. Thus, this black-box process allows to easily exchange existing solvers on the CPU with a GPU variant provided by {\ViennaCL}. Tab.~\ref{tab:solverbench} shows that the performance gain of GPU implementations can be significant. For applications where most time is spent on the solution of the linear systems, the use of {\ViennaCL} can reduce the total execution time by about a factor of five. \begin{table}[tb] \begin{center} \begin{tabular}{l|c| c|c| c|c|} Compute Device & CG, float & CG, double & GMRES, float & GMRES, double \\ \hline CPU & 0.407 & 0.450 & 4.84 & 7.58 \\ NVIDIA GTX 260 & 0.067 & 0.092 & 4.27 & 5.08 \\ NVIDIA GTX 470 & 0.063 & 0.087 & 3.63 & 4.68 \\ ATI Radeon 5850 & 0.233 & - & 22.7 & -\\ \end{tabular} \caption{Execution times (seconds) for ten iterations of CG and GMRES without preconditioner. Results for BiCGStab are similar to that of CG.} \label{tab:solverbench} \end{center} \end{table} ViennaCL-1.5.1-src/doc/manual/shared-lib.tex000644 001750 001750 00000002215 12255634117 020544 0ustar00rupprupp000000 000000 \chapter{Shared Library} \label{chap:shared-lib} In order to open up {\ViennaCL} to other languages such as C, FORTRAN, or Python, a shared library is under development in the subfolder \lstinline|libviennacl/|. Currently the different BLAS backends for dense linear algebra are available. Sparse linear algebra, iterative solvers, etc. will follow in future releases. The design and calling conventions are very similar to vendor BLAS libraries. All functions are prefixed 'ViennaCL'. The three backends provide their functionality through functions prefixed \lstinline|ViennaCLCUDA|, \lstinline|ViennaCLOpenCL|, and \lstinline|ViennaCLHost|, respectively. Since we consider the standard BLAS interface rather tedious and error-prone, an additional object-oriented interface is provided as well. Have a look at \lstinline|examples/tutorial/libviennacl.cpp| as well as the tests located at \lstinline|tests/src/libviennacl*| to get an impression on how to use these methods. Also, all callable functions in the shared library are listed in the public include file \lstinline|libviennacl/include/viennacl.hpp|. Additional documentation will be added incrementally.ViennaCL-1.5.1-src/doc/manual/multi-device.tex000644 001750 001750 00000013440 12235435247 021124 0ustar00rupprupp000000 000000 \chapter{Configuring OpenCL Contexts and Devices} \label{chap:multi-devices} Support for multiple devices was officially added in {\OpenCL} 1.1. Among other things, this allows e.g.~to use all CPUs in a multi-socket CPU mainboard as a single {\OpenCL} compute device. Nevertheless, the efficient use of multiple {\OpenCL} devices is far from trivial, because algorithms have to be designed such that they take distributed memory and synchronization issues into account. Support for multiple {\OpenCL} devices and contexts was introduced in {\ViennaCL} with version 1.1.0. In the following we give a description of the provided functionality. \NOTE{In {\ViennaCLversion} there is no native support for automatically executing operations over multiple GPUs. Partition of data is left to the user.} \section{Context Setup} Unless specified otherwise (see Chap.~\ref{chap:custom-contexts}), {\ViennaCL} silently creates its own context and adds all available default devices with a single queue per device to it. All operations are then carried out on this context, which can be obtained with the call \begin{lstlisting} viennacl::ocl::current_context(); \end{lstlisting} This default context is identified by the ID $0$ (of type \lstinline|long|). {\ViennaCL} uses the first platform returned by the OpenCL backend for the context. If a different platform should be used on a machine with multiple platforms available, this can be achieved with \begin{lstlisting} viennacl::ocl::set_context_platform_index(id, platform_index); \end{lstlisting} where the context ID is \lstinline|id| and \lstinline|platform_index| refers to the array index of the platform as returned by \lstinline|clGetPlatformIDs()|. By default, only the first device in the context is used for all operations. This device can be obtained via \begin{lstlisting} viennacl::ocl::current_context().current_device(); viennacl::ocl::current_device(); //equivalent to above \end{lstlisting} A user may wish to use multiple {\OpenCL} contexts, where each context consists of a subset of the available devices. To setup a context with ID \lstinline|id| with a particular device type only, the user has to specify this prior to any other {\ViennaCL} related statements: \begin{lstlisting} //use only GPUs: viennacl::ocl::set_context_device_type(id, viennacl::ocl::gpu_tag()); //use only CPUs: viennacl::ocl::set_context_device_type(id, viennacl::ocl::cpu_tag()); //use only the default device type viennacl::ocl::set_context_device_type(id, viennacl::ocl::default_tag()); //use only accelerators: viennacl::ocl::set_context_device_type(id, viennacl::ocl::accelerator_tag()); \end{lstlisting} Instead of using the tag classes, the respective {\OpenCL} constants \texttt{CL\_DEVICE\_TYPE\_GPU} etc.~can be supplied as second argument. Another possibility is to query all devices from the current platform: \begin{lstlisting} std::vector< viennacl::ocl::device > devices = viennacl::ocl::platform().devices(); \end{lstlisting} and create a custom subset of devices, which is then passed to the context setup routine: \begin{lstlisting} //take the first and the third available device from 'devices' std::vector< viennacl::ocl::device > my_devices; my_devices.push_back(devices[0]); my_devices.push_back(devices[2]); //Initialize the context with ID 'id' with these devices: viennacl::ocl::setup_context(id, my_devices); \end{lstlisting} Similarly, contexts with other IDs can be set up. \TIP{For details on how to initialize {\ViennaCL} with already existing contexts, see Chapter \ref{chap:custom-contexts}.} The library user is reminded that memory objects within a context are allocated for all devices within a context. Thus, setting up contexts with one device each is optimal in terms of memory usage, because each memory object is then bound to a single device only. However, memory transfer between contexts (and thus devices) has to be done manually by the library user then. Moreover, the user has to keep track in which context the individual {\ViennaCL} objects have been created, because all operands are assumed to be in the currently active context. \section{Switching Contexts and Devices} {\ViennaCL} always uses the currently active {\OpenCL} context with the currently active device to enqueue compute kernels. The default context is identified by ID '$0$'. The context with ID \lstinline|id| can be set as active context with the line. \begin{lstlisting} viennacl::ocl::switch_context(id); \end{lstlisting} Subsequent kernels are then enqueued on the active device for that particular context. Similar to setting contexts active, the active device can be set for each context. For example, setting the second device in the context to be the active device, the lines \begin{lstlisting} viennacl::ocl::current_context().switch_device(1); \end{lstlisting} are required. In some circumstances one may want to pass the device object directly, e.g.~to set the second device of the platform active: \begin{lstlisting} std::vector const & devices = viennacl::ocl::platform().devices(); viennacl::ocl::current_context().switch_device(devices[1]); \end{lstlisting} If the supplied device is not part of the context, an error message is printed and the active device remains unchanged. \section{Setting OpenCL Compiler Flags} Each {\OpenCL} context provides a member function \lstinline|.build_options()|, which can be used to pass OpenCL compiler flags prior to compilation. Note that flags need to be passed to the context prior to the compilation of the respective kernels, i.e.~prior the first instantiation of the respective matrix or vector types. To pass the \lstinline|-cl-mad-enable| flag to the current context, the line \begin{lstlisting} viennacl::ocl::current_context().build_options("-cl-mad-enable"); \end{lstlisting} is sufficient. Confer to the {\OpenCL} standard for a full list of flags. ViennaCL-1.5.1-src/doc/manual/license.tex000644 001750 001750 00000002706 12267307212 020155 0ustar00rupprupp000000 000000 \chapter{License} % \addcontentsline{toc}{chapter}{License} Copyright (c) 2010-2014 Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. Argonne National Laboratory, with facilities in the state of Illinois, is owned by The United States Government, and operated by UChicago Argonne, LLC under provision of a contract with the Department of Energy. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ViennaCL-1.5.1-src/doc/manual/additional-algorithms.tex000644 001750 001750 00000031153 12255634117 023014 0ustar00rupprupp000000 000000 \chapter{Additional Algorithms} \label{chap:additional-algorithms} The following algorithms are still not yet mature enough to be considered core-functionality, and/or are available with the {\OpenCL} backend only. \section{Additional Iterative Solvers} The following iterative solvers are only available on selected computing backends. \subsection{Mixed-Precision Conjugate Gradients} A two-stage mixed-precision CG algorithm is available as follows: \begin{lstlisting} viennacl::linalg::mixed_precision_cg_tag mixed_prec_cg_config; vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, mixed_prec_cg_config); \end{lstlisting} As usual, the first parameter to the constructor of \lstinline|mixed_precision_cg_tag| is the relative tolerance for the residual, while the second parameter denotes the maximum number of solver iterations. The third parameter denotes the relative tolerance for the inner low-precision CG iterations and defaults to $0.01$. \TIP{Have a look at \lstinline|examples/banchmarks/solver.cpp| for an example.} \NOTE{A mixed-precision solver makes sense only if the matrix and right-hand-side vector are supplied in \lstinline|double| precision.} \NOTE{The mixed-precision solver is currently available with the {\OpenCL} compute backend only.} \section{Additional Preconditioners} In addition to the preconditioners discussed in Sec.~\ref{sec:preconditioner}, two more preconditioners are available with the {\OpenCL} backend and are described in the following. \subsection{Algebraic Multigrid} \NOTE{Algebraic Multigrid preconditioners are only available with the {\OpenCL} backend and are experimental in {\ViennaCLversion}. Interface changes as well as considerable performance improvements may be included in future releases!} \NOTE{Algebraic Multigrid preconditioners depend on {\ublas}.} Algebraic multigrid mimics the behavior of geometric multigrid on the algebraic level and is thus suited for black-box purposes, where only the system matrix and the right hand side vector are available \cite{trottenberg:multigrid}. Many different flavors of the individual multigrid ingredients exists \cite{yang:parallel-amg}, of which the most common ones are implemented in {\ViennaCL}. The two main ingredients of algebraic multigrid are a coarsening algorithm and an interpolation algorithm. The available coarsening methods are listed in Tab.~\ref{tab:amg-coarsening}. \begin{table}[tbp] \begin{center} \begin{tabular}{l|l} Description & {\ViennaCL} option constant \\ \hline Classical Ruge-St\"uben (RS) & \lstinline|VIENNACL_AMG_COARSE_RS| \\ One-Pass & \lstinline|VIENNACL_AMG_COARSE_ONEPASS| \\ RS0 & \lstinline|VIENNACL_AMG_COARSE_RS0| \\ RS3 & \lstinline|VIENNACL_AMG_COARSE_RS3| \\ Aggregation & \lstinline|VIENNACL_AMG_COARSE_AG| \\ Smoothed aggregation & \lstinline|VIENNACL_AMG_COARSE_SA| \\ \end{tabular} \caption{AMG coarsening methods available in {\ViennaCL}. Per default, classical RS coarsening is used.\label{tab:amg-coarsening}} \end{center} \end{table} The available interpolation methods are given in Tab.~\ref{tab:amg-interpolation}. \begin{table}[tbp] \begin{center} \begin{tabular}{l|l} Description & {\ViennaCL} option constant \\ \hline Direct & \lstinline|VIENNACL_AMG_INTERPOL_DIRECT| \\ Classic & \lstinline|VIENNACL_AMG_INTERPOL_ONEPASS| \\ RS0 coarsening & \lstinline|VIENNACL_AMG_INTERPOL_RS0| \\ RS3 coarsening & \lstinline|VIENNACL_AMG_INTERPOL_RS3| \\ \end{tabular} \caption{AMG interpolation methods available in {\ViennaCL}. Per default, direct interpolation is used.\label{tab:amg-interpolation}} \end{center} \end{table} In addition, the following parameters can be controlled in the \lstinline|amg_tag| and can be passed to the constructor: \begin{itemize} \item Strength of dependence threshold (default: $0.25$) \item Interpolation weight (default: $1$) \item Jacobi smoother weight (default: $1$) \item Number of pre-smoothing steps (default: $1$) \item Number of post-smoothing steps (default: $1$) \item Number of coarse levels \end{itemize} \TIP{Note that the efficiency of the various AMG flavors are typically highly problem-specific. Therefore, failure of one method for a particular problem does NOT imply that other coarsening or interpolation strategies will fail as well.} \subsection{Sparse Approximate Inverses} \NOTE{Sparse Approximate Inverse preconditioners are only available with the {\OpenCL} backend and are experimental in {\ViennaCLversion}. Interface changes as well as considerable performance improvements may be included in future releases!} \NOTE{Sparse Approximate Inverse preconditioners depend on {\ublas}.} An alternative construction of a preconditioner for a sparse system matrix $A$ is to compute a matrix $M$ with a prescribed sparsity pattern such that \begin{align} \Vert AM - I \Vert_F \rightarrow \min \ , \end{align} where $\Vert \cdot \Vert_F$ denotes the Frobenius norm. This is the basic idea of sparse approximate inverse (SPAI) preconditioner. It becomes increasingly attractive because of their inherent high degree of parallelism, since the minimization problem can be solved independently for each column of $M$. {\ViennaCL} provides two preconditioners of this family: The first is the classical SPAI algorithm as described by Grote and Huckle \cite{grote:spai}, the second is the factored SPAI (FSPAI) for symmetric matrices as proposed by Huckle \cite{huckle:fspai}. SPAI can be employed for a CPU matrix \lstinline|M| of type \lstinline|MatrixType| as follows: \begin{lstlisting} // setup SPAI preconditioner, purely CPU-based viennacl::linalg::spai_precond spai_cpu(M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2)); //solve (e.g. using stab. Bi-conjugate gradient solver) vcl_result = viennacl::linalg::solve(M, rhs, viennacl::linalg::bicgstab_tag(), spai_cpu); \end{lstlisting} The first parameter denotes the residual norm threshold for the full matrix, the second parameter the maximum number of pattern updates, and the third parameter is the threshold for the residual of each minimization problem. For GPU-matrices, only parts of the setup phase are computed on the CPU, because compute-intensive tasks can be carried out on the GPU: \begin{lstlisting} // setup SPAI preconditioner, GPU-assisted viennacl::linalg::spai_precond spai_gpu(vcl_matrix, viennacl::linalg::spai_tag(1e-3, 3, 5e-2)); //solve (e.g. using conjugate gradient solver) vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, viennacl::linalg::bicgstab_tag(), spai_gpu); \end{lstlisting} The \lstinline|GPUMatrixType| is typically a \lstinline|viennacl::compressed_matrix| type. For symmetric matrices, FSPAI can be used with the conjugate gradient solver: \begin{lstlisting} viennacl::linalg::fspai_precond fspai_cpu(M, viennacl::linalg::fspai_tag()); //solve (e.g. using stab. Bi-conjugate gradient solver) vcl_result = viennacl::linalg::solve(M, rhs, viennacl::linalg::cg_tag(), fspai_cpu); \end{lstlisting} Our experience is that FSPAI is typically more efficient than SPAI when applied to the same matrix, both in computational effort and in terms of convergence acceleration of the iterative solvers. \NOTE{At present, there is no GPU-accelerated FSPAI included in {\ViennaCL}.} Note that FSPAI depends on the ordering of the unknowns, thus bandwidth reduction algorithms may be employed first, cf.~Sec.~\ref{sec:bandwidth-reduction}. \section{Fast Fourier Transform} \NOTE{The fast Fourier transform is experimental in {\ViennaCLversion} and available with the {\OpenCL} backend only. Interface changes as well as considerable performance improvements may be included in future releases!} Since there is no standardized complex type in {\OpenCL} at the time of the release of {\ViennaCLversion}, vectors need to be set up with real- and imaginary part before computing a fast Fourier tranform (FFT). In order to store complex numbers $z_0$, $z_1$, etc.~in a \lstinline|viennacl::vector|, say \lstinline|v|, the real and imaginary parts are mapped to even and odd entries of \lstinline|v| respectively: \lstinline|v[0] = Real(z_0)|, \lstinline|v[1] = Imag(z_0)|, \lstinline|v[2] = Real(z_1)|, \lstinline|v[3] = Imag(z_1)|, etc. The FFT of \lstinline|v| can then be computed either by writing to a second vector \lstinline|output| or by directly writing the result to \lstinline|v| \begin{lstlisting} viennacl::fft(v, output); viennacl::inplace_fft(v); \end{lstlisting} Conversely, the inverse FFT is computed as \begin{lstlisting} viennacl::ifft(v, output); viennacl::inplace_ifft(v); \end{lstlisting} \NOTE{In {\ViennaCLversion} the FFT with complexity $N \log N$ is computed for vectors with a size of a power of two only. For other vector sizes, a standard discrete Fourier transform with complexity $N^2$ is employed. This is subject to change in future versions.} \section{Bandwidth Reduction} \label{sec:bandwidth-reduction} \NOTE{Bandwidth reduction algorithms are experimental in {\ViennaCLversion}. Interface changes as well as considerable performance improvements may be included in future releases!} The bandwidth of a sparse matrix is defined as the maximum difference of the indices of nonzero entries in a row, taken over all rows. A low bandwidth typically allows for the use of efficient banded matrix solvers instead of iterative solvers. Moreover, better cache utilization as well as lower fill-in in LU-factorization based algorithms can be expected. For a given sparse matrix with large bandwidth, {\ViennaCL} provides routines for renumbering the unknowns such that the reordered system matrix shows much smaller bandwidth. Typical applications stem from the discretization of partial differential equations by means of the finite element or the finite difference method. The algorithms employed are as follows: \begin{itemize} \item Classical Cuthill-McKee algorithm \cite{cuthill:reducing-bandwidth} \item Iterated Cuthill-McKee algorithm with different seeds \cite{cuthill:reducing-bandwidth} \item Gibbs-Poole-Stockmeyer algorithm, cf.~\cite{lewis:gps-algorithm} \end{itemize} The iterated Cuthill-McKee algorithm applies the classical Cuthill-McKee algorithm to different starting nodes with small, but not necessarily minimal degree as root node into account. While this iterated application is more expensive in times of execution times, it may lead to better results than the classical Cuthill-McKee algorithm. A parameter $a \in [0,1]$ controls the number of nodes considered: All nodes with degree $d$ fulfilling \begin{align*} d_{\min} \leq d \leq d_{\min} + a(d_{\max} - d_{\min}) \end{align*} are considered, where $d_{\min}$ and $d_{\max}$ are the miminum and maximum nodal degrees in the graph. A second parameter \lstinline|gmax| specifies the number of additional root nodes considered. The algorithms are called for a \lstinline|matrix| of a type compatible with \lstinline|std::vector< std::map >| by \begin{lstlisting} r = viennacl::reorder(matrix, viennacl::cuthill_mckee_tag()); r = viennacl::reorder(matrix, viennacl::advanced_cuthill_mckee_tag(a, gmax)); r = viennacl::reorder(matrix, viennacl::gibbs_poole_stockmeyer_tag()); \end{lstlisting} and return the permutation array. In {\ViennaCLversion}, the user then needs to manually reorder the sparse matrix based on the permutation array. Example code can be found in \lstinline|examples/tutorial/bandwidth-reduction.cpp|. \section{Nonnegative Matrix Factorization} \NOTE{Nonnegative Matrix Factorization is experimental in {\ViennaCLversion} and available with the {\OpenCL} backend only. Interface changes as well as considerable performance improvements may be included in future releases!} In various fields such as text mining, a matrix $V$ needs to be factored into factors $W$ and $H$ such that the function \begin{align*} f(W, H) = \Vert V - WH \Vert_{\mathrm{F}}^2 \end{align*} is minimized. The algorithm proposed by Lee and Seoung \cite{lee:nmf} is available in ViennaCL in the header file \texttt{viennacl/linalg/nmf.hpp} as \begin{lstlisting} viennacl::matrix V(size1, size2); viennacl::matrix W(size1, k); viennacl::matrix H(k, size2); viennacl::linalg::nmf_config conf; viennacl::linalg::nmf(v_ref, w_nmf, h_nmf, conf); \end{lstlisting} For an overview of the parameters (tolerances) of the configuration object \lstinline|conf|, please refer to the Doxygen documentation in \texttt{doc/doxygen/}. ViennaCL-1.5.1-src/doc/manual/design.tex000644 001750 001750 00000015455 12235435247 020016 0ustar00rupprupp000000 000000 \chapter{Design Decisions} During the implementation of {\ViennaCL}, several design decisions have been necessary, which are often a trade-off among various advantages and disadvantages. In the following, we discuss several design decisions and their alternatives. \section{Transfer CPU-GPU-CPU for Scalars} The {\ViennaCL} scalar type \lstinline|scalar<>| essentially behaves like a CPU scalar in order to make any access to GPU ressources as simple as possible, for example \begin{lstlisting} float cpu_float = 1.0f; viennacl::linalg::scalar gpu_float = cpu_float; gpu_float = gpu_float * gpu_float; gpu_float -= cpu_float; cpu_float = gpu_float; \end{lstlisting} As an alternative, the user could have been required to use \lstinline|copy| as for the vector and matrix classes, but this would unnecessarily complicate many commonly used operations like \begin{lstlisting} if (norm_2(gpu_vector) < 1e-10) { ... } \end{lstlisting} or \begin{lstlisting} gpu_vector[0] = 2.0f; \end{lstlisting} where one of the operands resides on the CPU and the other on the GPU. Initialization of a separate type followed by a call to \lstinline|copy| is certainly not desired for the above examples. However, one should use \lstinline|scalar<>| with care, because the overhead for transfers from CPU to GPU and vice versa is very large for the simple \lstinline|scalar<>| type. \NOTE{Use \lstinline|scalar<>| with care, it is much slower than built-in types on the CPU!} \section{Transfer CPU-GPU-CPU for Vectors} The present way of data transfer for vectors and matrices from CPU to GPU to CPU is to use the provided \lstinline|copy| function, which is similar to its counterpart in the Standard Template Library (STL): \begin{lstlisting} std::vector cpu_vector(10); ViennaCL::LinAlg::vector gpu_vector(10); /* fill cpu_vector here */ //transfer values to gpu: copy(cpu_vector.begin(), cpu_vector.end(), gpu_vector.begin()); /* compute something on GPU here */ //transfer back to cpu: copy(gpu_vector.begin(), gpu_vector.end(), cpu_vector.begin()); \end{lstlisting} A first alternative approach would have been to to overload the assignment operator like this: \begin{lstlisting} //transfer values to gpu: gpu_vector = cpu_vector; /* compute something on GPU here */ //transfer back to cpu: cpu_vector = gpu_vector; \end{lstlisting} The first overload can be directly applied to the \lstinline|vector|-class provided by \ViennaCL. However, the question of accessing data in the \lstinline|cpu_vector| object arises. For \lstinline|std::vector| and C arrays, the bracket operator can be used, but the parenthesis operator cannot. However, other vector types may not provide a bracket operator. Using STL iterators is thus the more reliable variant. The transfer from GPU to CPU would require to overload the assignment operator for the CPU class, which cannot be done by {\ViennaCL}. Thus, the only possibility within {\ViennaCL} is to provide conversion operators. Since many different libraries could be used in principle, the only possibility is to provide conversion of the form \begin{lstlisting} template operator T() {/* implementation here */} \end{lstlisting} for the types in {\ViennaCL}. However, this would allow even totally meaningless conversions, e.g.~from a GPU vector to a CPU boolean and may result in obscure unexpected behavior. Moreover, with the use of \texttt{copy} functions it is much clearer, at which point in the source code large amounts of data are transferred between CPU and GPU. \section{Solver Interface} We decided to provide an interface compatible to {\ublas} for dense matrix operations. The only possible generalization for iterative solvers was to use the tagging facility for the specification of the desired iterative solver. \section{Iterators} Since we use the iterator-driven \lstinline|copy| function for transfer from CPU to GPU to CPU, iterators have to be provided anyway. However, it has to be repeated that they are usually VERY slow, because each data access (i.e.~dereferentiation) implies a new transfer between CPU and GPU. Nevertheless, CPU-cached vector and matrix classes could be introduced in future releases of {\ViennaCL}. A remedy for quick iteration over the entries of e.g.~a vector is the following: \begin{lstlisting} std::vector temp(gpu_vector.size()); copy(gpu_vector.begin(), gpu_vector.end(), temp.begin()); for (std::vector::iterator it = temp.begin(); it != temp.end(); ++it) { //do something with the data here } copy(temp.begin(), temp.end(), gpu_vector.begin()); \end{lstlisting} The three extra code lines can be wrapped into a separate iterator class by the library user, who also has to ensure data consistency during the loop. \section{Initialization of Compute Kernels} Since {\OpenCL} relies on passing the {\OpenCL} source code to a built-in just-in-time compiler at run time, the necessary kernels have to be generated every time an application using {\ViennaCL} is started. One possibility was to require a mandatory \begin{lstlisting} viennacl::init(); \end{lstlisting} before using any other objects provided by {\ViennaCL}, but this approach was discarded for the following two reasons: \begin{itemize} \item If \lstinline|viennacl::init();| is accidentally forgotten by the user, the program will most likely terminate in a rather uncontrolled way. \item It requires the user to remember and write one extra line of code, even if the default settings are fine. \end{itemize} Initialization is instead done in a lazy manner when requesting {\OpenCL} kernels. Kernels with similar functionality are grouped together in a common compilation units. This allows a fine-grained control over which source code to compile where and when. For example, there is no reason to compile the sparse matrix compute kernels at program startup if there are no sparse matrices used at all. Moreover, the just-in-time compilation of all available compute kernels in {\ViennaCL} takes several seconds. Therefore, a request-based compilation is used to minimize any overhead due to just-in-time compilation. The request-based compilation is a two-step process: At the first instantiation of an object of a particular type from {\ViennaCL}, the full source code for all objects of the same type is compiled into a {\OpenCL} program for that type. Each program contains plenty of compute kernels, which are not yet initialized. Only if an argument for a compute kernel is set, the kernel actually cares about its own initialization. Any subsequent calls of that kernel reuse the already compiled and initialized compute kernel. \NOTE{When benchmarking {\ViennaCL}, first a dummy call to the functionality of interest should be issued prior to taking timings. Otherwise, benchmark results include the just-in-time compilation, which is a constant independent of the data size.} ViennaCL-1.5.1-src/doc/manual/changelogs.tex000644 001750 001750 00000065521 12267305524 020655 0ustar00rupprupp000000 000000 \chapter{Change Logs} %\addcontentsline{toc}{chapter}{Change Logs} \section*{Version 1.5.x} \subsection*{Version 1.5.1} This maintenance release fixes a few nasty bugs: \begin{itemize} \item Fixed a memory leak in the OpenCL kernel generator. Thanks to GitHub user dxyzab for spotting this. \item Added compatibility of the mixed precision CG implementation with older AMD GPUs. Thanks to Andreas Rost for the input. \item Fixed an error when running the QR factorization for matrices with less rows than columns. Thanks to Karol Polko for reporting. \item Readded accidentally removed chapters on additional algorithms and structured matrices to the manual. Thanks to Sajjadul Islam for the hint. \item Fixed buggy OpenCL kernels for matrix additions and subtractions for column-major matrices. Thanks to Tom Nicholson for reporting. \item Fixed an invalid default kernel parameter set for matrix-matrix multiplications on CPUs when using the OpenCL backend. Thanks again to Tom Nicholson. \item Corrected a weak check used in two tests. Thanks to Walter Mascarenhas for providing a fix. \item Fixed a wrong global work size inside the SPAI preconditioner. Thanks to Andreas Rost. \end{itemize} \subsection*{Version 1.5.0} This new minor release number update focuses on a more powerful API, and on first steps in making ViennaCL more accessible from languages other than C++. In addition to many internal improvements both in terms of performance and flexibility, the following changes are visible to users: \begin{itemize} \item API-change: User-provided OpenCL kernels extract their kernels automatically. A call to \lstinline|add_kernel()| is now obsolete, hence the function was removed. \item API-change: Device class has been extend and supports all informations defined in the OpenCL 1.1 standard through member functions. Duplicate \lstinline|compute_units()| and \lstinline|max_work_group_size()| have been removed (thanks for Shantanu Agarwal for the input). \item API-change: \lstinline|viennacl::copy()| from a ViennaCL object to an object of non-ViennaCL type no longer tries to resize the object accordingly. An assertion is thrown if the sizes are incorrect in order to provide a consistent behavior across many different types. \item Datastructure change: Vectors and matrices are now padded with zeros by default, resulting in higher performance particularly for matrix operations. This padding needs to be taken into account when using \lstinline|fast_copy()|, particularly for matrices. \item Fixed problems with CUDA and CMake+CUDA on Visual Studio. \item \lstinline|coordinate_matrix<>| now also behaves correctly for tiny matrix dimensions. \item CMake 2.6 as new minimum requirement instead of CMake 2.8. \item Vectors and matrices can be instantiated with integer template types (long, int, short, char). \item Added support for \lstinline|element_prod()| and \lstinline|element_div()| for dense matrices. \item Added \lstinline|element_pow()| for vectors and matrices. \item Added \lstinline|norm_frobenius()| for computing the Frobenius norm of dense matrices. \item Added unary element-wise operations for vectors and dense matrices: \lstinline|element_sin()|, \lstinline|element_sqrt()|, etc. \item Multiple OpenCL contexts can now be used in a multi-threaded setting (one thread per context). \item Multiple inner products with a common vector can now be computed efficiently via e.g.~\lstinline|inner_prod(x, tie(y, z));| \item Added support for \lstinline|prod(A, B)|, where \lstinline|A| is a sparse matrix type and \lstinline|B| is a dense matrix (thanks to Albert Zaharovits for providing parts of the implementation). \item Added \lstinline|diag()| function for extracting the diagonal of a vector to a matrix, or for generating a square matrix from a vector with the vector elements on a diagonal (similar to MATLAB). \item Added \lstinline|row()| and \lstinline|column()| functions for extracting a certain row or column of a matrix to a vector. \item Sparse matrix-vector products now also work with vector strides and ranges. \item Added \lstinline|async_copy()| for vectors to allow for a better overlap of computation and communication. \item Added \lstinline|compressed_compressed_matrix| type for the efficient representation of CSR matrices with only few nonzero rows. \item Added possibility to switch command queues in OpenCL contexts. \item Improved performance of Block-ILU by removing one spurious conversion step. \item Improved performance of Cuthill-McKee algorithm by about 40 percent. \item Improved performance of power iteration by avoiding the creation of temporaries in each step. \item Removed spurious status message to cout in matrix market reader and nonnegative matrix factorization. \item The OpenCL kernel launch logic no longer attempts to re-launch the kernel with smaller work sizes if an error is encountered (thanks to Peter Burka for pointing this out). \item Reduced overhead for lenghty expressions involving temporaries (at the cost of increased compilation times). \item \lstinline|vector| and \lstinline|matrix| are now padded to dimensions being multiples of 128 per default. This greatly improves GEMM performance for arbitrary sizes. \item Loop indices for OpenMP parallelization are now all signed, increasing compatibility with older OpenMP implementations (thanks to Mrinal Deo for the hint). \item Complete rewrite of the generator. Now uses the scheduler for specifying the operation. Includes a full device database for portable high performance of GEMM kernels. \item Added micro-scheduler for attaching the OpenCL kernel generator to the user API. \item Certain BLAS functionality in ViennaCL is now also available through a shared library (libviennacl). \item Removed the external kernel parameter tuning factility, which is to be replaced by an internal device database through the kernel generator. \item Completely eliminated the OpenCL kernel conversion step in the developer repository and the source-release. One can now use the developer version without the need for a Boost installation. \end{itemize} \section*{Version 1.4.x} \subsection*{Version 1.4.2} This is a maintenance release, particularly resolving compilation problems with Visual Studio 2012. \begin{itemize} \item Largely refactored the internal code base, unifying code for \lstinline|vector|, \lstinline|vector_range|, and \lstinline|vector_slice|. Similar code refactoring was applied to \lstinline|matrix|, \lstinline|matrix_range|, and \lstinline|matrix_slice|. This not only resolves the problems in VS 2012, but also leads to shorter compilation times and a smaller code base. \item Improved performance of matrix-vector products of \lstinline|compressed_matrix| on CPUs using OpenCL. \item Resolved a bug which shows up if certain rows and columns of a \lstinline|compressed_matrix| are empty and the matrix is copied back to host. \item Fixed a bug and improved performance of GMRES. Thanks to Ivan Komarov for reporting via sourceforge. \item Added additional Doxygen documentation. \end{itemize} \subsection*{Version 1.4.1} This release focuses on improved stability and performance on AMD devices rather than introducing new features: \begin{itemize} \item Included fast matrix-matrix multiplication kernel for AMD's Tahiti GPUs if matrix dimensions are a multiple of 128. Our sample HD7970 reaches over 1.3 TFLOPs in single precision and 200 GFLOPs in double precision (counting multiplications and additions as separate operations). \item All benchmark FLOPs are now using the common convention of counting multiplications and additions separately (ignoring fused multiply-add). \item Fixed a bug for matrix-matrix multiplication with \lstinline|matrix_slice<>| when slice dimensions are multiples of 64. \item Improved detection logic for Intel OpenCL SDK. \item Fixed issues when resizing an empty \lstinline|compressed_matrix|. \item Fixes and improved support for BLAS-1-type operations on dense matrices and vectors. \item Vector expressions can now be passed to \lstinline|inner_prod()| and \lstinline|norm_1()|, \lstinline|norm_2()| and \lstinline|norm_inf()| directly. \item Improved performance when using OpenMP. \item Better support for Intel Xeon Phi (MIC). \item Resolved problems when using OpenCL for CPUs if the number of cores is not a power of 2. \item Fixed a flaw when using AMG in debug mode. Thanks to Jakub Pola for reporting. \item Removed accidental external linkage (invalidating header-only model) of SPAI-related functions. Thanks again to Jakub Pola. \item Fixed issues with copy back to host when OpenCL handles are passed to CTORs of vector, matrix, or \lstinline|compressed_matrix|. Thanks again to Jakub Pola. \item Added fix for segfaults on program exit when providing custom OpenCL queues. Thanks to Denis Demidov for reporting. \item Fixed bug in \lstinline|copy()| to \lstinline|hyb_matrix| as reported by Denis Demidov (thanks!). \item Added an overload for \lstinline|result_of::alignment| for \lstinline|vector_expression|. Thanks again to Denis Demidov. \item Added SSE-enabled code contributed by Alex Christensen. \end{itemize} \subsection*{Version 1.4.0} The transition from 1.3.x to 1.4.x features the largest number of additions, improvements, and cleanups since the initial release. In particular, host-, OpenCL-, and CUDA-based execution is now supported. OpenCL now needs to be enabled explicitly! New features and feature improvements are as follows: \begin{itemize} \item Added host-based and CUDA-enabled operations on ViennaCL objects. The default is now a host-based execution for reasons of compatibility. Enable OpenCL- or CUDA-based execution by defining the preprocessor constant \lstinline|VIENNACL_WITH_OPENCL| and \lstinline|VIENNACL_WITH_CUDA| respectively. Note that CUDA-based execution requires the use of nvcc. \item Added mixed-precision CG solver (OpenCL-based). \item Greatly improved performance of ILU0 and ILUT preconditioners (up to 10-fold). Also fixed a bug in ILUT. \item Added initializer types from Boost.uBLAS (\lstinline|unit_vector|, \lstinline|zero_vector|, \lstinline|scalar_vector|, \lstinline|identity_matrix|, \lstinline|zero_matrix|, \lstinline|scalar_matrix|). Thanks to Karsten Ahnert for suggesting the feature. \item Added incomplete Cholesky factorization preconditioner. \item Added element-wise operations for vectors as available in Boost.uBLAS (\lstinline|element_prod|, \lstinline|element_div|). \item Added restart-after-N-cycles option to BiCGStab. \item Added level-scheduling for ILU-preconditioners. Performance strongly depends on matrix pattern. \item Added least-squares example including a function \lstinline|inplace_qr_apply_trans_Q()| to compute the right hand side vector $Q^T b$ without rebuilding $Q$. \item Improved performance of LU-factorization of dense matrices. \item Improved dense matrix-vector multiplication performance (thanks to Philippe Tillet). \item Reduced overhead when copying to/from \lstinline|ublas::compressed_matrix|. \item ViennaCL objects (scalar, vector, etc.) can now be used as global variables (thanks to an anonymous user on the support-mailinglist). \item Refurbished OpenCL vector kernels backend. All operations of the type v1 = a v2 @ b v3 with vectors v1, v2, v3 and scalars a and b including += and -= instead of = are now temporary-free. Similarly for matrices. \item \lstinline|matrix_range| and \lstinline|matrix_slice| as well as \lstinline|vector_range| and \lstinline|vector_slice| can now be used and mixed completely seamlessly with all standard operations except \lstinline|lu_factorize()|. \item Fixed a bug when using copy() with iterators on vector proxy objects. \item Final reduction step in \lstinline|inner_prod()| and norms is now computed on CPU if the result is a CPU scalar. \item Reduced kernel launch overhead of simple vector kernels by packing multiple kernel arguments together. \item Updated SVD code and added routines for the computation of symmetric eigenvalues using OpenCL. \item \lstinline|custom_operation|'s constructor now support multiple arguments, allowing multiple expression to be packed in the same kernel for improved performances. However, all the datastructures in the multiple operations must have the same size. \item Further improvements to the OpenCL kernel generator: Added a repeat feature for generating loops inside a kernel, added element-wise products and division, added support for every one-argument OpenCL function. \item The name of the operation is now a mandatory argument of the constructor of \lstinline|custom_operation|. \item Improved performances of the generated matrix-vector product code. \item Updated interfacing code for the Eigen library, now working with Eigen 3.x.y. \item Converter in source-release now depends on Boost.filesystem3 instead of Boost.filesystem2, thus requiring Boost 1.44 or above. \end{itemize} \section*{Version 1.3.x} \subsection*{Version 1.3.1} The following bugfixes and enhancements have been applied: \begin{itemize} \item Fixed a compilation problem with GCC 4.7 caused by the wrong order of function declarations. Also removed unnecessary indirections and unused variables. \item Improved out-of-source build in the src-version (for packagers). \item Added virtual destructor in the \lstinline|runtime_wrapper|-class in the kernel generator. \item Extended flexibility of submatrix and subvector proxies (ranges, slices). \item Block-ILU for \lstinline|compressed_matrix| is now applied on the GPU during the solver cycle phase. However, for the moment the implementation file in \newline \texttt{viennacl/linalg/detail/ilu/opencl\_block\_ilu.hpp} needs to be included separately in order to avoid an OpenCL dependency for all ILU implementations. \item SVD now supports double precision. \item Slighly adjusted the interface for NMF. The approximation rank is now specified by the supplied matrices $W$ and $H$. \item Fixed a problem with matrix-matrix products if the result matrix is not initialized properly (thanks to Laszlo Marak for finding the issue and a fix). \item The operations $C += prod(A, B)$ and $C -= prod(A, B)$ for matrices A, B, and C no longer introduce temporaries if the three matrices are distinct. \end{itemize} \subsection*{Version 1.3.0} Several new features enter this new minor version release. Some of the experimental features introduced in 1.2.0 keep their experimental state in 1.3.x due to the short time since 1.2.0, with exceptions listed below along with the new features: \begin{itemize} \item Full support for ranges and slices for dense matrices and vectors (no longer experimental) \item QR factorization now possible for arbitrary matrix sizes (no longer experimental) \item Further improved matrix-matrix multiplication performance for matrix dimensions which are a multiple of 64 (particularly improves performance for NVIDIA GPUs) \item Added Lanczos and power iteration method for eigenvalue computations of dense and sparse matrices (experimental, contributed by G\"unther Mader and Astrid Rupp) \item Added singular value decomposition in single precision (experimental, contributed by Volodymyr Kysenko) \item Two new ILU-preconditioners added: ILU0 (contributed by Evan Bollig) and a block-diagonal ILU preconditioner using either ILUT or ILU0 for each block. Both preconditioners are computed entirely on the CPU. \item Automated OpenCL kernel generator based on high-level operation specifications added (many thanks to Philippe Tillet who had a lot of \emph{fun fun fun} working on this) \item Two new sparse matrix types (by Volodymyr Kysenko): \lstinline|ell_matrix| for the ELL format and \lstinline|hyb_matrix| for a hybrid format (contributed by Volodymyr Kysenko). \item Added possibility to specify the OpenCL platform used by a context \item Build options for the OpenCL compiler can now be supplied to a context (thanks to Krzysztof Bzowski for the suggestion) \item Added nonnegative matrix factorization by Lee and Seoung (contributed by Volodymyr Kysenko). \end{itemize} \section*{Version 1.2.x} \subsection*{Version 1.2.1} The current release mostly provides a few bug fixes for experimental features introduced in 1.2.0. In addition, performance improvements for matrix-matrix multiplications are applied. The main changes (in addition to some internal adjustments) are as follows: \begin{itemize} \item Fixed problems with double precision on AMD GPUs supporting \lstinline|cl_amd_fp64| instead of \lstinline|cl_khr_fp64| (thanks to Sylvain R.) \item Considerable improvements in the handling of \lstinline|matrix_range|. Added project() function for convenience (cf. Boost.uBLAS) \item Further improvements of matrix-matrix multiplication performance (contributed by Volodymyr Kysenko) \item Improved performance of QR factorization \item Added direct element access to elements of \lstinline|compressed_matrix| using \lstinline|operator()| (thanks to sourceforge.net user Sulif for the hint) \item Fixed incorrect matrix dimensions obtained with the transfer of non-square sparse Eigen and MTL matrices to ViennaCL objects (thanks to sourceforge.net user ggrocca for pointing at this) \end{itemize} \subsection*{Version 1.2.0} Many new features from the Google Summer of Code and the I$\mu$E Summer of Code enter this release. Due to their complexity, they are for the moment still in \textit{experimental} state (see the respective chapters for details) and are expected to reach maturity with the 1.3.0 release. Shorter release cycles are planned for the near future. \begin{itemize} \item Added a bunch of algebraic multigrid preconditioner variants (contributed by Markus Wagner) \item Added (factored) sparse approximate inverse preconditioner (SPAI, contributed by Nikolay Lukash) \item Added fast Fourier transform (FFT) for vector sizes with a power of two, standard Fourier transform for other sizes (contributed by Volodymyr Kysenko) \item Additional structured matrix classes for circulant matrices, Hankel matrices, Toeplitz matrices and Vandermonde matrices (contributed by Volodymyr Kysenko) \item Added reordering algorithms (Cuthill-McKee and Gibbs-Poole-Stockmeyer, contributed by Philipp Grabenweger) \item Refurbished CMake build system (thanks to Michael Wild) \item Added matrix and vector proxy objects for submatrix and subvector manipulation \item Added (possibly GPU-assisted) QR factorization \item Per default, a \lstinline|viennacl::ocl::context| now consists of one device only. The rationale is to provide better out-of-the-box support for machines with hybrid graphics (two GPUs), where one GPU may not be capable of double precision support. \item Fixed problems with \lstinline|viennacl::compressed_matrix| which occurred if the number of rows and columns differed \item Improved documentation for the case of multiple custom kernels within a program \item Improved matrix-matrix multiplication kernels (may lead to up to 20 percent performance gains) \item Fixed problems in GMRES for small matrices (dimensions smaller than the maximum number of Krylov vectors) \end{itemize} \section*{Version 1.1.x} \subsection*{Version 1.1.2} This final release of the {\ViennaCL} 1.1.x family focuses on refurbishing existing functionality: \begin{itemize} \item Fixed a bug with partial vector copies from CPU to GPU (thanks to sourceforge.net user kaiwen). \item Corrected error estimations in CG and BiCGStab iterative solvers (thanks to Riccardo Rossi for the hint). \item Improved performance of CG and BiCGStab as well as Jacobi and row-scaling preconditioners considerably (thanks to Farshid Mossaiby and Riccardo Rossi for a lot of input). \item Corrected linker statements in CMakeLists.txt for MacOS (thanks to Eric Christiansen). \item Improved handling of {\ViennaCL} types (direct construction, output streaming of matrix- and vector-expressions, etc.). \item Updated old code in the \texttt{coordinate\_matrix} type and improved performance (thanks to Dongdong Li for finding this). \item Using \lstinline|size_t| instead of \lstinline|unsigned int| for the size type on the host. \item Updated double precision support detection for AMD hardware. \item Fixed a name clash in direct\_solve.hpp and ilu.hpp (thanks to sourceforge.net user random). \item Prevented unsupported assignments and copies of sparse matrix types (thanks to sourceforge.net user kszyh). \end{itemize} \subsection*{Version 1.1.1} This new revision release has a focus on better interaction with other linear algebra libraries. The few known glitches with version 1.1.0 are now removed. \begin{itemize} \item Fixed compilation problems on MacOS X and {\OpenCL} 1.0 header files due to undefined an preprocessor constant (thanks to Vlad-Andrei Lazar and Evan Bollig for reporting this) \item Removed the accidental external linkage for three functions (we appreciate the report by Gordon Stevenson). \item New out-of-the-box support for {\Eigen} \cite{eigen} and {\MTL} \cite{mtl4} libraries. Iterative solvers from ViennaCL can now directly be used with both libraries. \item Fixed a problem with GMRES when system matrix is smaller than the maximum Krylov space dimension. \item Better default parameter for BLAS3 routines leads to higher performance for matrix-matrix-products. \item Added benchmark for dense matrix-matrix products (BLAS3 routines). \item Added viennacl-info example that displays infos about the {\OpenCL} backend used by {\ViennaCL}. \item Cleaned up CMakeLists.txt in order to selectively enable builds that rely on external libraries. \item More than one installed {\OpenCL} platform is now allowed (thanks to Aditya Patel). \end{itemize} \subsection*{Version 1.1.0} A large number of new features and improvements over the 1.0.5 release are now available: \begin{itemize} \item The completely rewritten {\OpenCL} back-end allows for multiple contexts, multiple devices and even to wrap existing OpenCL resources into ViennaCL objects. A tutorial demonstrates the new functionality. Thanks to Josip Basic for pushing us into that direction. \item The tutorials are now named according to their purpose. \item The dense matrix type now supports both row-major and column-major storage. \item Dense and sparse matrix types now now be filled using STL-emulated types (\lstinline|std::vector< std::vector >| and \lstinline|std::vector< std::map< unsigned int, NumericT> >|) \item BLAS level 3 functionality is now complete. We are very happy with the general out-of-the-box performance of matrix-matrix-products, even though it cannot beat the extremely tuned implementations tailored to certain matrix sizes on a particular device yet. \item An automated performance tuning environment allows an optimization of the kernel parameters for the library user's machine. Best parameters can be obtained from a tuning run and stored in a XML file and read at program startup using pugixml. \item Two new preconditioners are now included: A Jacobi preconditioner and a row-scaling preconditioner. In contrast to ILUT, they are applied on the OpenCL device directly. \item Clean compilation of all examples under Visual Studio 2005 (we recommend newer compilers though...). \item Error handling is now carried out using C++ exceptions. \item Matrix Market now uses index base 1 per default (thanks to Evan Bollig for reporting that) \item Improved performance of norm\_X kernels. \item Iterative solver tags now have consistent constructors: First argument is the relative tolerance, second argument is the maximum number of total iterations. Other arguments depend on the respective solver. \item A few minor improvements here and there (thanks go to Riccardo Rossi and anonymous sourceforge.net users for reporting the issues) \end{itemize} \section*{Version 1.0.x} \subsection*{Version 1.0.5} This is the last 1.0.x release. The main changes are as follows: \begin{itemize} \item Added a reader and writer for MatrixMarket files (thanks to Evan Bollig for suggesting that) \item Eliminated a bug that caused the upper triangular direct solver to fail on NVIDIA hardware for large matrices (thanks to Andrew Melfi for finding that) \item The number of iterations and the final estimated error can now be obtained from iterative solver tags. \item Improvements provided by Klaus Schnass are included in the developer converter script (OpenCL kernels to C++ header) \item Disabled the use of reference counting for OpenCL handles on Mac OS X (caused seg faults on program exit) \end{itemize} \subsection*{Version 1.0.4} The changes in this release are: \begin{itemize} \item All tutorials now work out-of the box with Visual Studio 2008. \item Eliminated all {\ViennaCL} related warnings when compiling with Visual Studio 2008. \item Better (experimental) support for double precision on ATI GPUs, but no \texttt{norm\_1}, \texttt{norm\_2}, \texttt{norm\_inf} and \texttt{index\_norm\_inf} functions using ATI Stream SDK on GPUs in double precision. \item Fixed a bug in GMRES that caused segmentation faults under Windows. \item Fixed a bug in \texttt{const\_sparse\_matrix\_adapter} (thanks to Abhinav Golas and Nico Galoppo for almost simultaneous emails on that) \item Corrected incorrect return values in the sparse matrix regression test suite (thanks to Klaus Schnass for the hint) \end{itemize} \subsection*{Version 1.0.3} The main improvements in this release are: \begin{itemize} \item Support for multi-core CPUs with ATI Stream SDK (thanks to Riccardo Rossi, UPC. BARCELONA TECH, for suggesting this) \item \lstinline|inner_prod| is now up to a factor of four faster (thanks to Serban Georgescu, ETH, for pointing the poor performance of the old implementation out) \item Fixed a bug with \lstinline|plane_rotation| that caused system freezes with ATI GPUs. \item Extended the doxygen generated reference documentation \end{itemize} \subsection*{Version 1.0.2} A bug-fix release that resolves some problems with the Visual C++ compiler. \begin{itemize} \item Fixed some compilation problems under Visual C++ (version 2005 and 2008). \item All tutorials accidentally relied on {\ublas}. Now \texttt{tut1} and \texttt{tut5} can be compiled without {\ublas} \item Renamed \texttt{aux/} folder to \texttt{auxiliary/} (caused some problems on windows machines) \end{itemize} \subsection*{Version 1.0.1} This is a quite large revision of \texttt{ViennaCL 1.0.0}, but mainly improves things under the hood. \begin{itemize} \item Fixed a bug in lu\_substitute for dense matrices \item Changed iterative solver behavior to stop if a certain relative residual is reached \item ILU preconditioning is now fully done on the CPU, because this gives best overall performance \item All OpenCL handles of {\ViennaCL} types can now be accessed via member function \texttt{handle()} \item Improved GPU performance of GMRES by about a factor of two. \item Added generic \texttt{norm\_2} function in header file \texttt{norm\_2.hpp} \item Wrapper for \texttt{clFlush()} and \texttt{clFinish()} added \item Device information can be queried by \texttt{device.info()} \item Extended documentation and tutorials \end{itemize} \subsection*{Version 1.0.0} First release ViennaCL-1.5.1-src/doc/manual/kernel-generation.tex000644 001750 001750 00000004367 12235435247 022156 0ustar00rupprupp000000 000000 \chapter{Automated OpenCL User-Kernel Generation} \label{chap:kernel-generation} While {\ViennaCL} provides a convenient means of including custom {\OpenCL} compute kernels, cf.~Chap.~\ref{chap:custom}, it can be rather tedious to come up with a good compute kernel, or to come up with many similar kernels differing in small details only. For the case of BLAS level 1 and level 2 operations, {\ViennaCL} now provides an automated kernel generator, which takes a high-level specification of the operations and create one or more suitable OpenCL kernels. This allows for high-performance implementations of algorithms which may otherwise lead to spurious temporary objects. Consider the operation \begin{align*} \mathbf{x} = \mathbf{A} \times \bigl[ (\mathbf{y} \cdot (\mathbf{y}+\mathbf{z}))\mathbf{y} + \mathbf{z} \bigr] \ , \end{align*} where $\mathbf{x}$, $\mathbf{y}$ and $\mathbf{z}$ denote vectors, $\mathbf{A}$ is a dense matrix, and the dot denotes the vector dot product. With the generator it is sufficient to write the following C++ code in order to obtain an OpenCL kernel: \begin{lstlisting} // Instantiation of the symbolic variables symbolic_vector sX; symbolic_matrix sA; symbolic_vector sY; symbolic_vector sZ; //Creation of the custom operation custom_operation my_op( sX = prod(sA, inner_prod(sY, sY+sZ) * sY + sZ), "operation_name" ); \end{lstlisting} where \lstinline|NumericT| is either \lstinline|float| or \lstinline|double|. The string provided as second parameter is required and can be used to identify, manage and retrieve different kernels. No two \lstinline|custom_operation|s are allowed to be identified using the same string. The custom operation object \lstinline|my_op| can be enqueued like any other kernel: \begin{lstlisting} //Execution of the custom operation viennacl::ocl::enqueue(my_op(x,A,y,z)); \end{lstlisting} Here, \lstinline|x|, \lstinline|y|, \lstinline|z| are of type \lstinline|viennacl::vector| and \lstinline|A| is of type \lstinline|viennacl::matrix|. \TIP{Sample code can be found in \lstinline|tests/src/generator_*.cpp|} \NOTE{ The kernel generator is still experimental, yet already able to generate rather complex compute kernels. } ViennaCL-1.5.1-src/doc/manual/introduction.tex000644 001750 001750 00000007717 12255634117 021267 0ustar00rupprupp000000 000000 \chapter*{Introduction} \addcontentsline{toc}{chapter}{Introduction} The Vienna Computing Library (\ViennaCL) is a scientific computing library written in C++. It allows simple, high-level access to the vast computing resources available on parallel architectures such as GPUs and multi-core CPUs by using either a host-based computing backend, an {\OpenCL} computing backend, or {\CUDA}. The primary focus is on common linear algebra operations (BLAS levels 1, 2 and 3) and the solution of large sparse systems of equations by means of iterative methods. In {\ViennaCLminorversion}, the following iterative solvers are implemented (confer for example to the book of Y.~Saad \cite{saad-iterative-solution}): \begin{itemize} \item Conjugate Gradient (CG) \item Stabilized BiConjugate Gradient (BiCGStab) \item Generalized Minimum Residual (GMRES) \end{itemize} A number of preconditioners is provided with {\ViennaCLversion} in order to improve convergence of these solvers, cf.~Chap.~\ref{chap:algorithms}. The solvers and preconditioners can also be used with different libraries due to their generic implementation. At present, it is possible to use the solvers and preconditioners directly with types from the {\ublas} library, which is part of {\Boost} \cite{boost}. The iterative solvers can directly be used with Eigen \cite{eigen} and MTL 4 \cite{mtl4}. Under the hood, {\ViennaCL} uses a unified layer to access {\CUDA} \cite{nvidiacuda}, {\OpenCL} \cite{khronoscl}, and/or {\OpenMP} \cite{openmp} for accessing and executing code on compute devices. Therefore, {\ViennaCL} is not tailored to products from a particular vendor and can be used on many different platforms. At present, {\ViennaCL} is known to work on all current CPUs and modern GPUs from NVIDIA and AMD (see Tab.~\ref{tab:double-precision-GPUs}), CPUs using either the AMD Accelerated Parallel Processing (APP) SDK (formerly ATI Stream SDK) or the Intel OpenCL SDK, and Intels MIC platform (Xeon Phi). \NOTE{Double precision arithmetic on GPUs is only possible if it is provided by the GPU. There is no double precision emulation in {\ViennaCL}.} \NOTE{Double precision arithmetic using the ATI Stream SDK or AMD APP SDK may not be fully OpenCL-certified. Also, we have observed bugs in AMD APP SDKs 2.7 which affects some algorithms in {\ViennaCL} (e.g.~BiCGStab).} \begin{table}[tb] \begin{center} \begin{tabular}{l|c|c} Compute Device & float & double \\ \hline \NVIDIA Geforce 86XX GT/GSO & ok & - \\ \NVIDIA Geforce 88XX GTX/GTS & ok & - \\ \NVIDIA Geforce 96XX GT/GSO & ok & - \\ \NVIDIA Geforce 98XX GTX/GTS & ok & - \\ \NVIDIA GT 230 & ok & - \\ \NVIDIA GT(S) 240 & ok & - \\ \NVIDIA GTS 250 & ok & - \\ \NVIDIA GTX 260 & ok & ok \\ \NVIDIA GTX 275 & ok & ok \\ \NVIDIA GTX 280 & ok & ok \\ \NVIDIA GTX 285 & ok & ok \\ \NVIDIA GTX 4XX & ok & ok \\ \NVIDIA GTX 5XX & ok & ok \\ \NVIDIA GTX 6XX & ok & ok \\ \NVIDIA Quadro FX 46XX & ok & - \\ \NVIDIA Quadro FX 48XX & ok & ok \\ \NVIDIA Quadro FX 56XX & ok & - \\ \NVIDIA Quadro FX 58XX & ok & ok \\ \NVIDIA Tesla 870 & ok & - \\ \NVIDIA Tesla C10XX & ok & ok \\ \NVIDIA Tesla C20XX & ok & ok \\ \hline ATI Radeon HD 4XXX & ok & - \\ ATI Radeon HD 48XX & ok & essentially ok \\ ATI Radeon HD 5XXX & ok & - \\ ATI Radeon HD 58XX & ok & essentially ok \\ ATI Radeon HD 59XX & ok & essentially ok \\ ATI Radeon HD 68XX & ok & - \\ ATI Radeon HD 69XX & ok & essentially ok \\ ATI Radeon HD 77XX & ok & - \\ ATI Radeon HD 78XX & ok & - \\ ATI Radeon HD 79XX & ok & essentially ok \\ ATI FireStream V92XX & ok & essentially ok \\ ATI FirePro V78XX & ok & essentially ok \\ ATI FirePro V87XX & ok & essentially ok \\ ATI FirePro V88XX & ok & essentially ok \\ \end{tabular} \caption{Available arithmetics in {\ViennaCL} provided by selected GPUs. Some older versions of the Stream SDK (APP SDK) from AMD/ATI may not comply to the {\OpenCL} standard for double precision extensions.} \label{tab:double-precision-GPUs} \end{center} \end{table} ViennaCL-1.5.1-src/doc/manual/types.tex000644 001750 001750 00000055634 12255634117 017713 0ustar00rupprupp000000 000000 \chapter{Basic Types} \label{chap:basic-types} This chapter provides a brief overview of the basic interfaces and usage of the provided data types. Operations on the various types are explained in Chapter \ref{chap:operations}. For full details, refer to the reference pages in the folder \texttt{doc/doxygen}. \section {Scalar Type} The scalar type \lstinline|scalar| with template parameter T denoting the underlying CPU scalar type (\lstinline|char|, \lstinline|short|, \lstinline|int|, \lstinline|long|, \lstinline|float| and \lstinline|double|, if supported - see Tab.~\ref{tab:double-precision-GPUs}) represents a single scalar value on the computing device. \lstinline|scalar| is designed to behave much like a scalar type on conventional host-based CPU processing, but library users have to keep in mind that every operation on \lstinline|scalar| may require the launch of an appropriate compute kernel on the GPU, thus making the operation much slower then the conventional CPU equivalent. Even if the host-based computing backend of {\ViennaCL} is used, some (small) overheads occur. \NOTE{Be aware that operations between objects of type \lstinline|scalar| (e.g.~additions. comparisons) have large overhead on GPU backends. A separate compute kernel launch is required for every operation in such case.} \subsection{Example Usage} The scalar type of {\ViennaCL} can be used just like the built-in types, as the following snippet shows: \begin{lstlisting} float cpu_float = 42.0f; double cpu_double = 13.7603; viennacl::scalar gpu_float(3.1415f); viennacl::scalar gpu_double = 2.71828; //conversions cpu_float = gpu_float; gpu_float = cpu_double; //automatic transfer and conversion cpu_float = gpu_float * 2.0f; cpu_double = gpu_float - cpu_float; \end{lstlisting} Mixing built-in types with the {\ViennaCL} scalar is usually not a problem. Nevertheless, since every operation requires {\OpenCL} calls, such arithmetics should be used sparsingly. \NOTE{In the present version of {\ViennaCL}, it is not possible to assign a \lstinline|scalar| to a \lstinline|scalar| directly.} \NOTE{Mixing operations between objects of different scalar types is not supported. Convert the data manually on the host if needed.} \subsection{Members} Apart from suitably overloaded operators that mimic the behavior of the respective CPU counterparts, only a single public member function \lstinline|handle()| is available, cf.~Tab.~\ref{tab:scalar-interface}. \begin{table}[tb] \begin{center} \begin{tabular}{p{6.5cm}|p{8.5cm}} Interface & Comment\\ \hline \texttt{v.handle()} & The memory handle (CPU, {\CUDA}, or {\OpenCL}) \\ \end{tabular} \caption{Interface of \texttt{vector$<$T$>$} in \ViennaCL. Destructors and operator overloads for BLAS are not listed.} \label{tab:scalar-interface} \end{center} \end{table} \section{Vector Type} The main vector type in {\ViennaCL} is \texttt{vector$<$T, alignment$>$}, representing a chunk of memory on the compute device. \texttt{T} is the underlying scalar type (\lstinline|char|, \lstinline|short|, \lstinline|int|, \lstinline|long|, either \lstinline|float|, or \lstinline|double| if supported, cf.~Tab.~\ref{tab:double-precision-GPUs}, complex types are not supported in \ViennaCLversion). The second template argument \texttt{alignment} is deprecated and should not be provided by the library user. At construction, \texttt{vector$<$T, alignment$>$} is initialized to have the supplied length, but the memory is not initialized to zero. Another difference to CPU implementations is that accessing single vector elements is very costly, because every time an element is accessed, it has to be transferred from the CPU to the compute device or vice versa. \subsection{Example Usage} The following code snippet shows the typical use of the vector type provided by {\ViennaCL}. The overloaded function \texttt{copy()} function, which is used similar to \lstinline|std::copy()| from the C++ Standard Template Library (STL), should be used for writing vector entries: \begin{lstlisting} std::vector stl_vec(10); viennacl::vector vcl_vec(10); //fill the STL vector: for (size_t i=0; i$} is initialized to have the supplied length, but memory is not initialized. If initialization is desired, the memory can be initialized with zero values using the member function \lstinline|clear()|. See Tab.~\ref{tab:vector-interface} for other member functions. \NOTE{Accessing single elements of a vector using operator() or operator[] is very slow for GPUs due to PCI-Express latency! Use with care!} \begin{table}[tb] \begin{center} \begin{tabular}{p{6.5cm}|p{8.5cm}} Interface & Comment\\ \hline \texttt{CTOR(n)} & Constructor with number of entries \\ \texttt{v(i)} & Access to the $i$-th element of v (slow for GPUs!) \\ \texttt{v[i]} & Access to the $i$-th element of v (slow for GPUs!) \\ \texttt{v.clear()} & Initialize v with zeros \\ \texttt{v.resize(n, bool preserve)} & Resize v to length n. Preserves old values if bool is true. \\ \texttt{v.begin()} & Iterator to the begin of the matrix \\ \texttt{v.end()} & Iterator to the end of the matrix \\ \texttt{v.size()} & Length of the vector \\ \texttt{v.swap(v2)} & Swap the content of v with v2 \\ \texttt{v.internal\_size()} & Returns the number of entries allocated on the GPU (taking alignment into account) \\ \texttt{v.empty()} & Shorthand notation for \texttt{v.size() == 0} \\ \texttt{v.clear()} & Sets all entries in v to zero \\ \texttt{v.handle()} & Returns the memory handle (needed for custom kernels, see Chap.~\ref{chap:custom}) \end{tabular} \caption{Interface of \texttt{vector$<$T$>$} in \ViennaCL. Destructors and operator overloads for BLAS are not listed.} \label{tab:vector-interface} \end{center} \end{table} One important difference to pure CPU implementations is that the bracket operator as well as the parenthesis operator are very slow, because for each access an {\OpenCL} data transfer has to be initiated. The overhead of this transfer is orders of magnitude. For example: \begin{lstlisting} // fill a vector on CPU for (size_t i=0; i$} represents a dense matrix with interface listed in Tab.~\ref{tab:matrix-interface}. The second optional template argument \texttt{F} specifies the storage layout and defaults to \texttt{row\_major}. As an alternative, a \lstinline|column_major| memory layout can be used. The third template argument \texttt{alignment} denotes an alignment for the rows and columns for row-major and column-major memory layout and should no longer be specified by the user (cf.~\texttt{alignment} for the \texttt{vector} type). \subsection{Example Usage} The use of \texttt{matrix$<$T, F$>$} is similar to that of the counterpart in {\ublas}. The operators are overloaded similarly. \begin{lstlisting} //set up a 3 by 5 matrix: viennacl::matrix vcl_matrix(4, 5); //fill it up: vcl_matrix(0,2) = 1.0; vcl_matrix(1,2) = -1.5; vcl_matrix(2,0) = 4.2; vcl_matrix(3,4) = 3.1415; \end{lstlisting} \NOTE{Accessing single elements of a matrix using \texttt{operator()} is very slow on GPU backends! Use with care!} A much better way is to initialize a dense matrix using the provided \texttt{copy()} function: \begin{lstlisting} //copy content from CPU matrix to GPU matrix copy(cpu_matrix, gpu_matrix); //copy content from GPU matrix to CPU matrix copy(gpu_matrix, cpu_matrix); \end{lstlisting} The type requirement on the \texttt{cpu\_matrix} is that \texttt{operator()} can be used for accessing entries, that a member function \texttt{size1()} returns the number of rows and that \texttt{size2()} returns the number of columns. Please refer to Chap.~\ref{chap:other-libs} for an overview of other libraries for which an overload of \texttt{copy()} is provided. \NOTE{The internal memory buffer of a \lstinline|matrix<>| is by default padded with zeros so that the internal matrix size is a multiple of e.g.~a power of two.} \subsection{Members} The members are listed in Tab.~\ref{tab:matrix-interface}. The usual operator overloads are not listed explicitly \begin{table}[tb] \begin{center} \begin{tabular}{p{6.5cm}|p{8.5cm}} Interface & Comment\\ \hline \texttt{CTOR(nrows, ncols)} & Constructor with number of rows and columns \\ \texttt{mat(i,j)} & Access to the element in the $i$-th row and the $j$-th column of mat \\ %\texttt{mat.clear()} & Initialize mat with zeros \\ \parbox{6cm}{\texttt{mat.resize(m, n, \\ \hphantom{mat.resize(}bool preserve)}} & Resize mat to m rows and n columns. Currently, the boolean flag is ignored and entries always discarded. \\ %\texttt{mat.begin()} & Iterator to the begin of the matrix \\ %\texttt{mat.end()} & Iterator to the end of the matrix \\ \texttt{mat.size1()} & Number of rows in mat \\ \texttt{mat.internal\_size1()} & Internal number of rows in mat \\ \texttt{mat.size2()} & Number of columns in mat \\ \texttt{mat.internal\_size2()} & Internal number of columns in mat \\ \texttt{mat.clear()} & Sets all entries in v to zero \\ \texttt{mat.handle()} & Returns the memory handle (needed for custom kernels, see Chap.~\ref{chap:custom}) \end{tabular} \caption{Interface of the dense matrix type \texttt{matrix$<$T, F$>$} in \ViennaCL. Constructors, Destructors and operator overloads for BLAS are not listed.} \label{tab:matrix-interface} \end{center} \end{table} \section{Sparse Matrix Types} There are five different sparse matrix types provided in {\ViennaCL}, \lstinline|compressed_matrix|, \lstinline|coordinate_matrix|, \lstinline|ell_matrix|, \lstinline|hyb_matrix|, and \lstinline|compressed_compressed_matrix|. \subsection{Compressed Matrix} \texttt{compressed\_matrix$<$T, alignment$>$} represents a sparse matrix using a compressed sparse row scheme. Again, \texttt{T} is the floating point type. \texttt{alignment} is the alignment and defaults to \texttt{1} at present. In general, sparse matrices should be set up on the CPU and then be pushed to the compute device using \texttt{copy()}, because dynamic memory management of sparse matrices is not provided on {\OpenCL} compute devices such as GPUs. \begin{table}[tb] \begin{center} \begin{tabular}{p{6.5cm}|p{8cm}} Interface & Comment\\ \hline \texttt{CTOR(nrows, ncols)} & Constructor with number of rows and columns \\ %\texttt{mat(i,j) const} & Read-only access to the element in the $i$-th row and the $j$-th column of mat \\ %\texttt{mat.readFrom(PointerType prows, PointerType pcols, PointerType pentries)} & Fill mat with the values from the supplied piece of memory. \\ %\texttt{mat.writeTo(PointerType prows, PointerType pcols, PointerType pentries)} & Fill the supplied piece of memory with values from mat. \\ \\ \texttt{mat.set()} & Initialize mat with the data provided as arguments \\ \texttt{mat.reserve(num)} & Reserve memory for up to \texttt{num} nonzero entries \\ %\texttt{mat.clear()} & Initialize mat with zeros \\ \texttt{mat.size1()} & Number of rows in mat \\ \texttt{mat.size2()} & Number of columns in mat \\ \texttt{mat.nnz()} & Number of nonzeroes in mat \\ \parbox{6cm}{\texttt{mat.resize(m, n, \\ \hphantom{mat.resize(}bool preserve)}} & Resize mat to m rows and n columns. Currently, the boolean flag is ignored and entries always discarded. \\ \texttt{mat.handle1()} & Returns the memory handle holding the row indices (needed for custom kernels, see Chap.~\ref{chap:custom}) \\ \texttt{mat.handle2()} & Returns the memory handle holding the column indices (needed for custom kernels, see Chap.~\ref{chap:custom}) \\ \texttt{mat.handle()} & Returns the memory handle holding the entries (needed for custom kernels, see Chap.~\ref{chap:custom}) \end{tabular} \caption{Interface of the sparse matrix type \texttt{compressed\_matrix$<$T, F$>$} in \ViennaCL. Destructors and operator overloads for BLAS are not listed.} \label{tab:compressed-matrix-interface} \end{center} \end{table} \subsubsection{Example Usage} \label{sec:compressed-matrix-example} The use of \texttt{compressed\_matrix$<$T, alignment$>$} is similar to that of the counterpart in {\ublas}. The operators are overloaded similarly. There is a direct interfacing with the standard implementation using a vector of maps from the STL: \begin{lstlisting} //set up a sparse 3 by 5 matrix on the CPU: std::vector< std::map< unsigned int, float> > cpu_sparse_matrix(4); //fill it up: cpu_sparse_matrix[0][2] = 1.0; cpu_sparse_matrix[1][2] = -1.5; cpu_sparse_matrix[3][0] = 4.2; //set up a sparse ViennaCL matrix: viennacl::compressed_matrix vcl_sparse_matrix(4, 5); //copy to OpenCL device: copy(cpu_sparse_matrix, vcl_sparse_matrix); //copy back to CPU: copy(vcl_sparse_matrix, cpu_sparse_matrix); \end{lstlisting} The \texttt{copy()} functions can also be used with a generic sparse matrix data type fulfilling the following requirements: \begin{itemize} \item The \texttt{const\_iterator1} type is provided for iteration along increasing row index \item The \texttt{const\_iterator2} type is provided for iteration along increasing column index \item \texttt{.begin1()} returns an iterator pointing to the element with indices $(0,0)$. \item \texttt{.end1()} returns an iterator pointing to the end of the first column \item When copying to the cpu type: Write operation via \texttt{operator()} \item When copying to the cpu type: \texttt{resize(m,n,preserve)} member (cf.~Tab.~\ref{tab:compressed-matrix-interface}) \end{itemize} The iterator returned from the cpu sparse matrix type via \texttt{begin1()} has to fulfill the following requirements: \begin{itemize} \item \texttt{.begin()} returns an column iterator pointing to the first nonzero element in the particular row. \item \texttt{.end()} returns an iterator pointing to the end of the row \item Increment and dereference \end{itemize} For the sparse matrix types in {\ublas}, these requirements are all fulfilled. Please refer to Chap.~\ref{chap:other-libs} for an overview of other libraries for which an overload of \texttt{copy()} is provided. \subsubsection{Members} The interface is described in Tab.~\ref{tab:compressed-matrix-interface}. \subsection{Coordinate Matrix} In the second sparse matrix type, \texttt{coordinate\_matrix$<$T, alignment$>$}, entries are stored as triplets \texttt{(i,j,val)}, where \texttt{i} is the row index, \texttt{j} is the column index and \texttt{val} is the entry. Again, \texttt{T} is the floating point type. The optional \texttt{alignment} defaults to \texttt{1} at present. In general, sparse matrices should be set up on the CPU and then be pushed to the compute device using \texttt{copy()}, because dynamic memory management of sparse matrices is not provided on {\OpenCL} compute devices such as GPUs. \begin{table}[tb] \begin{center} \begin{tabular}{p{6.5cm}|p{8cm}} Interface & Comment\\ \hline \texttt{CTOR(nrows, ncols)} & Constructor with number of rows and columns \\ %\texttt{mat(i,j) const} & Read-only access to the element in the $i$-th row and the $j$-th column of mat \\ %\texttt{mat.readFrom(PointerType prows, PointerType pcols, PointerType pentries)} & Fill mat with the values from the supplied piece of memory. \\ %\texttt{mat.writeTo(PointerType prows, PointerType pcols, PointerType pentries)} & Fill the supplied piece of memory with values from mat. \\ \\ \texttt{mat.reserve(num)} & Reserve memory for \texttt{num} nonzero entries \\ \texttt{mat.size1()} & Number of rows in mat \\ \texttt{mat.size2()} & Number of columns in mat \\ \texttt{mat.nnz()} & Number of nonzeroes in mat \\ \parbox{6cm}{\texttt{mat.resize(m, n, \\ \hphantom{mat.resize(}bool preserve)}} & Resize mat to m rows and n columns. Currently, the boolean flag is ignored and entries always discarded. \\ %\texttt{mat.clear()} & Initialize mat with zeros \\ \texttt{mat.resize(m, n)} & Resize mat to m rows and n columns. Does not preserve old values. \\ \texttt{mat.handle12()} & Returns the memory handle holding the row and column indices (needed for custom kernels, see Chap.~\ref{chap:custom}) \\ \texttt{mat.handle()} & Returns the memory handle holding the entries (needed for custom kernels, see Chap.~\ref{chap:custom}) \end{tabular} \caption{Interface of the sparse matrix type \texttt{coordinate\_matrix$<$T, A$>$} in \ViennaCL. Destructors and operator overloads for BLAS are not listed.} \label{tab:coordinate-matrix-interface} \end{center} \end{table} \subsubsection{Example Usage} The use of \texttt{coordinate\_matrix$<$T, alignment$>$} is similar to that of the first sparse matrix type \texttt{compressed\_matrix$<$T, alignment$>$}, thus we refer to Sec.~\ref{sec:compressed-matrix-example} \subsubsection{Members} The interface is described in Tab.~\ref{tab:coordinate-matrix-interface}. %\TIP{In {\ViennaCLversion} the use of \lstinline|compressed\_matrix| over \lstinline|coordinate\_matrix| is encouraged due to better performance!} \NOTE{Note that only a few preconditioners work with \lstinline|coordinate_matrix| so far, cf.~ Sec.~\ref{sec:preconditioner}.} \subsection{ELL Matrix} A sparse matrix in ELL format of type \lstinline|ell_matrix| is stored in a block of memory of size $N \times n_{\max}$, where $N$ is the number of rows of the matrix and $n_{\max}$ is the maximum number of nonzeros per row. Rows with less than $n_{\max}$ entries are padded with zeros. In a second memory block, the respective column indices are stored. The ELL format is well suited for matrices where most rows have approximately the same number of nonzeros. This is often the case for matrices arising from the discretization of partial differential equations using e.g.~the finite element method. On the other hand, the ELL format introduces substantial overhead if the number of nonzeros per row varies a lot. For an example use of an \lstinline|ell_matrix|, have a look at \lstinline|examples/benchmarks/sparse.cpp|. \NOTE{Note that preconditioners in Sec.~\ref{sec:preconditioner} do not work with \lstinline|ell_matrix| yet.} \subsection{Hybrid Matrix} The higher performance of the ELL format for matrices with approximately the same number of entries per row and the higher flexibility of the CSR format is combined in the \lstinline|hyb_matrix| type, where the main part of the system matrix is stored in ELL format and excess entries are stored in CSR format. For an example use of an \lstinline|hyb_matrix|, have a look at \lstinline|examples/benchmarks/sparse.cpp|. \NOTE{Note that preconditioners in Sec.~\ref{sec:preconditioner} do not work with \lstinline|hyb_matrix| yet.} \subsection{Compressed Compressed Matrix} If only a few rows of a sparse matrix are populated, then the previous sparse matrix formats are fairly expensive in terms of memory consumption. This is addressed by the \lstinline|compressed_compressed_matrix<>| format, which is similar to the standard CSR format, but only stores the rows containing nonzero elements. An additional array is used to store the global row index $r$ in the sparse matrix $A$ of the $i$-th nonzero row. \NOTE{Note that preconditioners in Sec.~\ref{sec:preconditioner} do not work with \lstinline|compressed_compressed_matrix| yet.} \section{Proxies} Similar to {\ublas}, {\ViennaCL} provides \lstinline|range| and \lstinline|slice| objects in order to conveniently manipulate dense submatrices and vectors. The functionality is provided in the headers \lstinline|viennacl/vector_proxy.hpp| and \lstinline|viennacl/matrix_proxy.hpp| respectively. A range refers to a contiguous integer interval and is set up as \begin{lstlisting} std::size_t lower_bound = 1; std::size_t upper_bound = 7; viennacl::range r(lower_bound, upper_bound); \end{lstlisting} A slice is similar to a range and allows in addition for arbitrary increments (\emph{stride}). For example, to create a slice consisting of the indices $2, 5, 8, 11, 14$, the code \begin{lstlisting} std::size_t start = 2; std::size_t stride = 3; std::size_t size = 5 viennacl::slice s(start, stride, size); \end{lstlisting} In order to address a subvector of a vector \lstinline|v| and a submatrix of a matrix \lstinline|M|, the proxy objects \lstinline|v_sub| and \lstinline|M_sub| are created as follows: \begin{lstlisting} typedef viennacl::vector VectorType; typedef viennacl::matrix MatrixType; viennacl::vector_range v_sub(v, r); viennacl::matrix_range M_sub(M, r, r); \end{lstlisting} As a shortcut, one may use the free function \lstinline|project()| in order to avoid having to write the type explicitly: \begin{lstlisting} project(v, r); //returns a vector_range as above project(M, r, r); //returns a matrix_range as above \end{lstlisting} In the same way \lstinline|vector_slice|s and \lstinline|matrix_slice|s are set up. The proxy objects can now be manipulated in the same way as vectors and dense matrices. In particular, operations such as vector proxy additions and matrix additions work as usual, e.g. \begin{lstlisting} vcl_sub += vcl_sub; //or project(v, r) += project(v, r); M_sub += M_sub; //or project(M, r, r) += project(M, r, r); \end{lstlisting} Submatrix-Submatrix products are computed in the same manner and are handy for many block-based linear algebra algorithms. \TIP{Example code can be found in \lstinline|examples/tutorial/vector-range.cpp| and \lstinline|examples/tutorial/matrix-range.cpp|} ViennaCL-1.5.1-src/doc/manual/custom-contexts.tex000644 001750 001750 00000012733 12235435247 021720 0ustar00rupprupp000000 000000 \chapter{Using ViennaCL in User-Provided OpenCL Contexts} \label{chap:custom-contexts} Many projects need similar basic linear algebra operations, but essentially operate in their own {\OpenCL} context. To provide the functionality and convenience of {\ViennaCL} to such existing projects, existing contexts can be passed to {\ViennaCL} and memory objects can be wrapped into the basic linear algebra types \lstinline|vector|, \lstinline|matrix| and \lstinline|compressed_matrix|. This chapter is devoted to the description of the necessary steps to use {\ViennaCL} on contexts provided by the library user. \TIP{An example of providing a custom context to {\ViennaCL} can be found in \texttt{examples/tutorial/custom-contexts.cpp}} \section{Passing Contexts to ViennaCL} {\ViennaCLversion} is able to handle an arbitrary number of contexts, which are identified by a key value of type \lstinline|long|. By default, {\ViennaCL} operates on the context identified by $0$, unless the user switches the context, cf.~Chapter \ref{chap:multi-devices}. According to the {\OpenCL} standard, a context contains devices and queues for each device. Thus, it is assumed in the following that the user has successfully created a context with one or more devices and one or more queues per device. In the case that the context contains only one device \lstinline|my_device| and one queue \lstinline|my_queue|, the context can be passed to {\ViennaCL} with the code \begin{lstlisting} cl_context my_context = ...; //a context cl_device_id my_device = ...; //a device in my_context cl_command_queue my_queue = ...; //a queue for my_device //supply existing context 'my_context' // with one device and one queue to ViennaCL using id '0': viennacl::ocl::setup_context(0, my_context, my_device, my_queue); \end{lstlisting} If a context ID other than \texttt{0}, say, \lstinline|id| is used, the user-defined context has to be selected using \begin{lstlisting} viennacl::ocl::switch_context(id); \end{lstlisting} It is also possible to provide a context with several devices and multiple queues per device. To do so, the device IDs have to be stored in a STL vector and the queues in a STL map: \begin{lstlisting} cl_context my_context = ...; //a context cl_device_id my_device1 = ...; //a device in my_context cl_device_id my_device2 = ...; //another device in my_context ... cl_command_queue my_queue1 = ...; //a queue for my_device1 cl_command_queue my_queue2 = ...; //another queue for my_device1 cl_command_queue my_queue3 = ...; //a queue for my_device2 ... //setup existing devices for ViennaCL: std::vector my_devices; my_devices.push_back(my_device1); my_devices.push_back(my_device2); ... //setup existing queues for ViennaCL: std::map > my_queues; my_queues[my_device1].push_back(my_queue1); my_queues[my_device1].push_back(my_queue2); my_queues[my_device2].push_back(my_queue3); ... //supply existing context with multiple devices //and queues to ViennaCL using id '0': viennacl::ocl::setup_context(0, my_context, my_devices, my_queues); \end{lstlisting} It is not necessary to pass all devices and queues created within a particular context to {\ViennaCL}, only those which {\ViennaCL} should use have to be passed. {\ViennaCL} will by default use the first queue on each device. The user has to care for appropriate synchronization between different queues. \TIP{{\ViennaCL} does not destroy the provided context automatically upon exit. The user should thus call \lstinline|clReleaseContext()| as usual for destroying the context.} \section{Wrapping Existing Memory with ViennaCL Types} Now as the user provided context is supplied to {\ViennaCL}, user-created memory objects have to be wrapped into {\ViennaCL} data-types in order to use the full functionality. Typically, one of the types \lstinline|scalar|, \lstinline|vector|, \lstinline|matrix| and \lstinline|compressed_matrix| are used: \begin{lstlisting} cl_mem my_memory1 = ...; cl_mem my_memory2 = ...; cl_mem my_memory3 = ...; cl_mem my_memory4 = ...; cl_mem my_memory5 = ...; //wrap my_memory1 into a vector of size 10 viennacl::vector my_vec(my_memory1, 10); //wrap my_memory2 into a row-major matrix of size 10x10 viennacl::matrix my_matrix(my_memory2, 10, 10); //wrap my_memory3 into a CSR sparse matrix with 10 rows and 20 nonzeros viennacl::compressed_matrix my_sparse(my_memory3, my_memory4, my_memory5, 10, 10, 20); //use my_vec, my_matrix, my_sparse as usual \end{lstlisting} The following has to be emphasized: \begin{itemize} \item Resize operations on {\ViennaCL} data types typically results in the object owning a new piece of memory. \item copy() operations from CPU RAM usually allocate new memory, so wrapped memory is ``forgotten'' \item On construction of the {\ViennaCL} object, \lstinline|clRetainMem()| is called once for the provided memory handle. Similarly, \lstinline|clReleaseMem()| is called as soon as the memory is not used any longer. \end{itemize} \NOTE{The user has to ensure that the provided memory is larger or equal to the size of the wrapped object.} \NOTE{Be aware the wrapping the same memory object into several different {\ViennaCL} objects can have unwanted side-effects. In particular, wrapping the same memory in two {\ViennaCL} vectors implies that if the entries of one of the vectors is modified, this is also the case for the second.} ViennaCL-1.5.1-src/doc/manual/keywords.tex000644 001750 001750 00000055223 12235435247 020411 0ustar00rupprupp000000 000000 % -*- mode: LaTeX -*- % $Id: keywords.tex,v 1.6 2006/03/22 12:48:58 entner Exp $ % Specific text elements \newcommand{\specific}[1] {\textit{#1}} \newcommand{\extension}[1] {\textit{.#1}} \newcommand{\email}[1] {#1} % Keywords and Indexing %%%%%%%%%%%%%%%%%%%%%%% % Enter keyword into keyword index \newcommand{\keywordidx}[1] {\index[idxkey]{#1@\texttt{#1}}} % Print keyword within paragraph \newcommand{\keyword}[1] {\texttt{#1}} % Print keyword and enter to keyword index \newcommand{\keywordI}[1] {\texttt{#1}{\keywordidx{#1}}} % Print material parameter and enter to the mat.par.index \newcommand{\ipdkeywordI}[1] {\texttt{#1}{\index[idxipd]{#1@\texttt{#1}}}} % Print bold keyword and enter to the keyword index %\newcommand{\bold}[1] {{\bf \hyperpage{#1}}} \newcommand{\keywordIB}[1] {\texttt{#1}{\keywordidx{#1}}} % Enter bold keyword to general index \newcommand{\indexbold}[1] {\index{#1|bold}} % Print keyword and add to a second keyword in the keyword index: \newcommand{\keywordII}[2] {\texttt{#2}{\index[idxkey]{#1@\texttt{#2}}}} % Environments for the model %%%%%%%%%%%%%%%%%%%%%%%%%%%% \newcommand{\psin}[1] {\varphi_{\mathrm{#1}}} \newcommand{\IR}[1] {I^R_\mathrm{N_#1}(\psin{1},\psin{2})} \newcommand{\rhs}[1] {\mathrm{rhs[N_#1]}} \newcommand{\Y}[2] {\mathrm{Y[N_#1][N_#2]}} \newcommand{\mdlalphachar}{\mbox{\mdltt{[a-zA-Z]}}} \newcommand{\mdlextendchar}{\mbox{\mdltt{[a-zA-Z0-9\_{\mdlbackslash}]}}} \newcommand{\MdlBool}{\mdltt{Mdl\-Bool}} \newcommand{\MdlString}{\mdltt{Mdl\-String}} \newcommand{\MmuMdlKeyword}{\mdltt{Mmu\-Mdl\-Key\-word}} \newcommand{\MmuMdlKeywordList}{\mdltt{MmuMdlKeywordList}} \newcommand{\mdlAliasedModel}{\mdlkeyw{AliasedModel}} \newcommand{\mdlEnd}{\mdlkeyw{End}} \newcommand{\mdlinclude}{\mdlkeyw{\#include}} \newcommand{\mdlInstance}{\mdlkeyw{Instance}} \newcommand{\mdlinstantiate}{\mdlkeyw{instantiate}} \newcommand{\mdlInterface}{\mdlkeyw{Interface}} \newcommand{\mdlLinkMap}{\mdlkeyw{LinkMap}} \newcommand{\mdlLoadObjectLibrary}{\mdlkeyw{LoadObjectLibrary}} \newcommand{\mdlLocal}{\mdlkeyw{Local}} \newcommand{\mdlModel}{\mdlkeyw{Model}} \newcommand{\mdlNewModel}{\mdlkeyw{New\-Model}} \newcommand{\mdlParameter}{\mdlkeyw{Parameter}} \newcommand{\mdlParameters}{\mdlkeyw{Parameters}} \newcommand{\mdlAnyParameter}{\mdlkeyw{AnyParameter}} \newcommand{\mdlSelect}{\mdlkeyw{Select}} \newcommand{\mdlStatic}{\mdlkeyw{Static}} \newcommand{\mdlbreak}{\mdlkeyw{break}} \newcommand{\mdlcalc}{\mdlkeyw{calc}} \newcommand{\mdlcall}{\mdlkeyw{call}} \newcommand{\mdlconstruct}{\mdlkeyw{construct}} \newcommand{\mdlcontinue}{\mdlkeyw{continue}} \newcommand{\mdldestruct}{\mdlkeyw{destruct}} \newcommand{\mdldo}{\mdlkeyw{do}} \newcommand{\mdlelse}{\mdlkeyw{else}} \newcommand{\mdlerrors}{\mdlkeyw{errors}} \newcommand{\mdlevaluate}{\mdlkeyw{evaluate}} \newcommand{\mdlfalse}{\mdlkeyw{false}} \newcommand{\mdlfor}{\mdlkeyw{for}} \newcommand{\mdlgenLibrary}{\mdlkeyw{genLibrary}} \newcommand{\mdlif}{\mdlkeyw{if}} \newcommand{\mdlinitialize}{\mdlkeyw{initialize}} \newcommand{\mdllink}{\mdlkeyw{link}} \newcommand{\mdllistModels}{\mdlkeyw{listModels}} \newcommand{\mdlmdlLibraryName}{\mdlkeyw{mdlLibraryName}} \newcommand{\mdlmethod}{\mdlkeyw{method}} \newcommand{\mdlof}{\mdlkeyw{of}} \newcommand{\mdlparser}{\mdlkeyw{parser}} \newcommand{\mdlprivate}{\mdlkeyw{private}} \newcommand{\mdlprotected}{\mdlkeyw{protected}} \newcommand{\mdlquiet}{\mdlkeyw{quiet}} \newcommand{\mdlreturn}{\mdlkeyw{return}} \newcommand{\mdlscanner}{\mdlkeyw{scanner}} \newcommand{\mdlset}{\mdlkeyw{set}} \newcommand{\mdlto}{\mdlkeyw{to}} \newcommand{\mdltrue}{\mdlkeyw{true}} \newcommand{\mdlverbose}{\mdlkeyw{verbose}} \newcommand{\mdlwhile}{\mdlkeyw{while}} \newcommand{\mdlCompilerProject}{\mdlkeyw{CompilerProject}} \newcommand{\mdlcompile}{\mdlkeyw{compile}} \newcommand{\mdlInfo}{\mdlkeyw{Info}} \newcommand{\mdlundef}{\mdlkeyw{undef}} \newcommand{\mdlModelClassDefinitionExtensions}{\mdltt{MODEL\_CLASS\_DEF\-INIT\-ION\_EX\-TENS\-IONS}} \newcommand{\mdlModelClassStdConstructor}{\mdltt{Model\-Class\-Std\-Con\-struc\-tor}} \newcommand{\mdlModelClassStdDestructor}{\mdltt{Model\-Class\-Std\-De\-struc\-tor}} \newcommand{\mdlModelClassStdDeclarations}{\mdltt{Model\-Class\-Std\-Dec\-larat\-ions}} \newcommand{\mdlModelClassInitExtension}{\mdltt{MODEL\_\-CLASS\_\-INIT\_\-EXT\-ENSION}} \newcommand{\MDLPREFIXSTRING}{\mdltt{MDL\-PRE\-FIX\-STRING}} \newcommand{\Model}{\mdltt{Model}} \newcommand{\Parameter}{\mdltt{Parameter}} \newcommand{\Interface}{\mdltt{Interface}} \newcommand{\mdlbackslash}{\ensuremath{\mathtt{\backslash}}} \newcommand{\divop}{\mathop{\rm div}} \newcommand{\gradop}{\mathop{\rm grad}} \newcommand{\asinh}{\mathop{\rm asinh}} \newcommand{\unix}{\progname{Unix}} \newcommand{\windows}{\progname{Windows-NT}} % Some derived tables \newenvironment{fixedwidthtable} [2] {\begin{mmnttable} {|l|p{10cm}|} {#1} {#2}} {\end{mmnttable}} \newenvironment{fixedwidthtablep} [3] {\begin{mmnttable} {|l|p{#3}|} {#1} {#2}} {\end{mmnttable}} \newenvironment{fixedwidthtableL} [3] {\begin{mmnttableL} {|l|p{10cm}|} {#1} {#2}{#3}} {\end{mmnttableL}} \newenvironment{fixedwidthTablep} [3] {\begin{mmntTable} {|l|p{#3}|} {#1} {#2}} {\end{mmntTable}} \newenvironment{fixedwidthTableL} [3] {\begin{mmntTableL} {|l|p{10cm}|} {#1} {#2}{#3}} {\end{mmntTableL}} \newenvironment{keydesctableII} [1] {\begin{mmnttable} {|l|p{10cm}|} {Keyword & Description} {#1}} {\end{mmnttable}} \newenvironment{keydesctableIIL} [2] {\begin{mmnttableL} {|l|p{10cm}|} {Keyword & Description} {#1}{#2}} {\end{mmnttableL}} \newenvironment{keydesctableIILp} [3] {\begin{mmnttableL} {|l|p{#3}|} {Keyword & Description} {#1}{#2}} {\end{mmnttableL}} \newenvironment{keydesctableIII} [1] {\begin{mmnttable} {|l|l|l|} {Keyword & Type & Description} {#1}} {\end{mmnttable}} \newenvironment{keydesctableIIIL} [2] {\begin{mmnttableL} {|l|l|l|} {Keyword & Type & Description} {#1}{#2}} {\end{mmnttableL}} \newenvironment{keydesctableIIILp} [3] {\begin{mmnttableL} {|l|l|p{#3}|} {Keyword & Type & Description} {#1}{#2}} {\end{mmnttableL}} \newenvironment{keydesctableIV} [1] {\begin{mmnttable} {|l|l|l|l|} {Keyword & Type & Description & Unit} {#1}} {\end{mmnttable}} \newenvironment{keydesctableIVL} [2] {\begin{mmnttableL} {|l|l|l|l|} {Keyword & Type & Description & Unit} {#1}{#2}} {\end{mmnttableL}} \newenvironment{keydescTableII} [1] {\begin{mmntTable} {|l|p{10cm}|} {Keyword & Description} {#1}} {\end{mmntTable}} \newenvironment{keydescTableIIL} [2] {\begin{mmntTableL} {|l|p{10cm}|} {Keyword & Description} {#1}{#2}} {\end{mmntTableL}} \newenvironment{keydescTableIII} [1] {\begin{mmntTable} {|l|l|l|} {Keyword & Type & Description} {#1}} {\end{mmntTable}} \newenvironment{keydescTableIIIL} [2] {\begin{mmntTableL} {|l|l|l|} {Keyword & Type & Description} {#1}{#2}} {\end{mmntTableL}} \newenvironment{keydescTableIIILp} [3] {\begin{mmntTableL} {|l|l|p{#3}|} {Keyword & Type & Description} {#1}{#2}} {\end{mmntTableL}} \newenvironment{keydescTableIV} [1] {\begin{mmntTable} {|l|l|l|l|} {Keyword & Type & Description & Unit} {#1}} {\end{mmntTable}} \newenvironment{keydescTableIVL} [2] {\begin{mmntTableL} {|l|l|l|l|} {Keyword & Type & Description & Unit} {#1}{#2}} {\end{mmntTableL}} \newenvironment{keytypetableII} [1] {\begin{mmnttable} {|l|l|} {Keyword & Type} {#1}} {\end{mmnttable}} \newenvironment{keytypetableIIL} [2] {\begin{mmnttableL} {|l|l|} {Keyword & Type} {#1}{#2}} {\end{mmnttableL}} \newenvironment{keyunittableI} [1] {\begin{mmnttable} {|l|l|l|} {Keyword & Type & Unit} {#1}} {\end{mmnttable}} \newenvironment{parameterdescrtable} [1] {\begin{mmnttable} {|l|l|} {Parameter & Description} {#1}} {\end{mmnttable}} \newenvironment{parameterdescrtableL}[2] {\begin{mmnttableL} {|l|l|} {Parameter & Description} {#1}{#2}} {\end{mmnttableL}} \newenvironment{symbolkeytableII} [1] {\begin{mmnttable} {|l|l|} {Symbol & Keyword} {#1}} {\end{mmnttable}} \newenvironment{symbolkeytableIIL} [2] {\begin{mmnttableL} {|l|l|} {Symbol & Keyword} {#1}{#2}} {\end{mmnttableL}} \newenvironment{symbolkeytableIII} [1] {\begin{mmnttable} {|l|l|l|} {Symbol & Keyword & Type} {#1}} {\end{mmnttable}} \newenvironment{symbolkeyTableIII} [1] {\begin{mmntTable} {|l|l|l|} {Symbol & Keyword & Type} {#1}} {\end{mmntTable}} \newenvironment{symbolkeytableIIIL} [2] {\begin{mmnttableL} {|l|l|l|} {Symbol & Keyword & Type} {#1}{#2}} {\end{mmnttableL}} \newenvironment{symbolkeytableIV} [1] {\begin{mmnttable} {|l|l|l|l|} {Symbol & Keyword & Type & Unit} {#1}} {\end{mmnttable}} \newenvironment{symbolkeyTableIV} [1] {\begin{mmntTable} {|l|l|l|l|} {Symbol & Keyword & Type & Unit} {#1}} {\end{mmntTable}} \newenvironment{symbolkeytableIVL} [2] {\begin{mmnttableL} {|l|l|l|l|} {Symbol & Keyword & Type & Unit} {#1}{#2}} {\end{mmnttableL}} \newenvironment{symbolkeytableV} [1] {\begin{mmnttable} {|l|l|l|} {Symbol & Keyword & Type} {#1}} {\end{mmnttable}} \newenvironment{valuekeytableIII} [1] {\begin{mmnttable} {|l|l|l|} {Keyword & Type & Values} {#1}} {\end{mmnttable}} \newenvironment{valuekeytableIIIL} [2] {\begin{mmnttableL} {|l|l|l|} {Keyword & Type & Values} {#1}{#2}} {\end{mmnttableL}} \newcommand{\gcc} {GCC} % File name \newcommand{\file}[1] {\stdin{#1}} \newcommand{\fileI}[1] {\file{#1}\index{#1}} \newcommand{\fileIB}[1] {{\file{#1}}{\index{#1|bold}}} % Some frequently used program names \newcommand{\progname}[1]{\textsc{#1}} \newcommand{\alib}{\progname{Algorithm Library}} \newcommand{\cgg}{\progname{Cgg}} \newcommand{\cpp}{\progname{C++}} \newcommand{\C}{\progname{c}} \newcommand{\tcl} {\progname{Tcl}} \newcommand{\perl} {\progname{Perl}} \newcommand{\ANSI}{\progname{Ansi}} \newcommand{\ansic}{\progname{Ansi~c}} \newcommand{\ansicpp}{\progname{nsi~\cpp}} \newcommand{\posix}{\progname{Posix}} \newcommand{\STL}{\progname{Stl}} \newcommand{\emacs}{\progname{Emacs}} \newcommand{\geo}{\progname{Geo2ps}} \newcommand{\geotops}{\progname{Geo2ps}} \newcommand{\inp}{\progname{Inp}} \newcommand{\make}{\progname{Make}} \newcommand{\makedevice}{\progname{makedevice}} \newcommand{\MDL}{\progname{Model Definition Language}} \newcommand{\mdl}{\progname{Mdl}} \newcommand{\mkdev}{\progname{Makedevice}} \newcommand{\python}{\progname{Python}} \newcommand{\xmgrace}{\progname{Xmgrace}} \newcommand{\xcrv}{\progname{Xcrv}} \newcommand{\minimos}{\progname{Minimos~6}} \newcommand{\mmnt}{\progname{Minimos-NT}} \newcommand{\mdlprog}{\mmnt} \newcommand{\mmtont}{\progname{Mm62nt}} \newcommand{\pbf}{\progname{Pbf}} \newcommand{\pbfm}{\progname{Pbfm}} \newcommand{\pif}{\progname{Pif}} \newcommand{\pifcopy}{\progname{Pifcopy}} \newcommand{\pifrm}{\progname{Pifrm}} \newcommand{\pifmaid}{\progname{Pifmaid}} \newcommand{\prost}{\progname{Prost2d}} \newcommand{\punch}{\progname{Punch}} \newcommand{\rul}{\progname{Rul}} \newcommand{\Siesta}{\progname{Siesta}} \newcommand{\sketch}{\progname{Sketch}} \newcommand{\spice}{\progname{Spice}} \newcommand{\splitseg}{\progname{Splitseg}} \newcommand{\svg}{\progname{Svg}} \newcommand{\svgtops}{\progname{Svg2ps}} \newcommand{\tif}{\progname{Tif}} \newcommand{\str}{\progname{Str}} \newcommand{\devedit}{\progname{Devedit}} \newcommand{\silvaco}{\progname{Silvaco}} \newcommand{\iseToPif}{\progname{Ise2pif}} \newcommand{\synopsys}{\progname{Synopsys}} \newcommand{\tifwrap}{\progname{Tifwrap}} \newcommand{\tiftopif}{\progname{Tif2pbf.sh}} \newcommand{\tsuprem}{\progname{Tsuprem4}} \newcommand{\athena}{\progname{Athena}} \newcommand{\xpif}{\progname{Xpif2d}} \newcommand{\xsvg}{\progname{Xsvg}} \newcommand{\vmake}{\progname{Vmake}} \newcommand{\eas}{\progname{Eas}} \newcommand{\pai}{\progname{Pai}} \newcommand{\PAI}{\progname{Pai}} \newcommand{\vista}{\progname{Vista}} \newcommand{\PBF}{\progname{Pbf}} \newcommand{\PBL}{\progname{Pbl}} \newcommand{\PLB}{\progname{Plb}} \newcommand{\PAL}{\progname{Pal}} \newcommand{\FORTRAN}{\progname{Fortran}} \newcommand{\LISP}{\progname{Lisp}} \newcommand{\PIL}{\progname{Pil}} \newcommand{\PCL}{\progname{Pcl}} \newcommand{\PFL}{\progname{Pfl}} \newcommand{\UNFUG}{\progname{Unfug}} \newcommand{\DIOS}{\progname{Dios}} \newcommand{\bison}{\progname{Bison}} \newcommand{\flex}{\progname{Flex}} \newcommand{\ipl}{ViennaIPD} \newcommand{\inputdeck} {ViennaIPD} \newcommand{\Inputdeck} {ViennaIPD} %\newcommand{\inputdeck} {input-deck} % word within a paragraph %\newcommand{\Inputdeck} {Input-deck} % at the beginning of a paragraph %\newcommand{\InputDeck} {Input-Deck} % in headings \newcommand{\curve} {\sc{Curve}} % simulation modes \newcommand{\sm}{\sc{Single-Mode}} \newcommand{\mm}{\sc{Mixed-Mode}} \newcommand{\MM}{\sc{Mixed-Mode}} \newcommand{\DC}{\sc{dc}} \newcommand{\AC}{\sc{ac}} \newcommand{\DD}{\sc{dd}} \newcommand{\HD}{\sc{hd}} % Name of a person (e.g. Maxwell) \newcommand{\persname}[1]{\textsl{#1}} \newcommand{\srh}{\persname{Shockley-Read-Hall}} \newcommand{\key}[1]{\texttt{<#1>}} \newcommand{\menu}[1]{\textsf{#1}} \newcommand{\window}[1]{\textsl{\textsf{#1}}} % Differential Operators: grad, div, rot, error function \newcommand{\GRAD}{\mathrm{grad}} \newcommand{\DIV}{\mathrm{div}} \newcommand{\ROT}{\mathrm{rot}} \newcommand{\erfc}{\mathrm{erfc}} % Partial Derivative \newcommand{\PD}[2]{\frac{\partial #1}{\partial #2}} % Total Derivative \newcommand{\TD}[2]{\frac{{\mathrm{d}} #1}{{\mathrm{d}} #2}} % References to Equations, Tables, Figures, Sections \newcommand{\Eq}[1]{(\ref{#1})} \newcommand{\Fig}[1]{Fig.~\ref{#1}} \newcommand{\Sec}[1]{Section~\ref{#1}} \newcommand{\Chapter}[1]{Chapter~\ref{#1}} \newcommand{\Appendix}[1]{Appendix~\ref{#1}} \newcommand{\Table}[1]{Table~\ref{#1}} % Vector \newcommand{\vect}[1]{\mathbf{#1}} % Bernoulli function \newcommand{\bern}{\mathrm{B}} % average D \newcommand{\dav}{\overline{D}} \newcommand{\ToKmO}{\left(\frac{\TL}{\mathrm{300\ K}} - 1\right)} \newcommand{\ToK}{\left(\frac{\TL}{\mathrm{300\ K}}\right)} \newcommand{\LToK}{\left(\frac{\TL}{\mathrm{77\ K}}\right)} \newcommand{\LToKTH}{\left(\frac{\TL}{\mathrm{300\ K}}\right)} \newcommand{\TK}{\left(\frac{T}{K}\right)} \newcommand{\KoT}{\left(\frac{\mathrm{300\ K}}{\TL}\right)} \newcommand{\KoLT}{\left(\frac{\mathrm{77\ K}}{\TL}\right)} \newcommand{\dop}[2] {\ensuremath{#1\times 10^{#2} \, \textrm{cm}^{-3}}} \newcommand{\dopOne}[1] {\ensuremath{10^{#1} \, \textrm{cm}^{-3}}} \newcommand{\kL} {\ensuremath{\kappa_\mathrm{L}}} \newcommand{\nrg} {\mathcal{E}} \newcommand{\lth} {\lambda_\mathrm{TH}} \newcommand{\hc} {h_\mathrm{corr}} \newcommand{\Qi} {Q_\mathrm{i}} \newcommand{\fB} {f_\mathrm{B}} \newcommand{\kdiel} {\ensuremath{\kappa_\mathrm{diel}}} \newcommand{\tdiel} {\ensuremath{t_\mathrm{diel}}} \newcommand{\mdiel} {\ensuremath{m_\mathrm{diel}}} \newcommand{\mOx} {\ensuremath{m_\mathrm{ox}}} \newcommand{\meff} {\ensuremath{m_\mathrm{eff}}} \newcommand{\kme} {\ensuremath{\kappa_\mathrm{metal}}} \newcommand{\ksi} {\ensuremath{\kappa_\mathrm{si}}} \newcommand{\epsr}[1]{\varepsilon_{\mathrm{r}}^{\mathrm{#1}}} \newcommand{\q}{\mathrm{q}} \newcommand{\kB}{\mathrm{k_B}} \newcommand{\Ed} {\nrg_\mathrm{d}} \newcommand{\Ea} {\nrg_\mathrm{a}} \newcommand{\ED} {\nrg_\mathrm{D}} \newcommand{\EA} {\nrg_\mathrm{A}} \newcommand{\Ec} {\nrg_\mathrm{C}} \newcommand{\Eci}[1]{\nrg_\mathrm{C#1}} \newcommand{\Ev} {\nrg_\mathrm{V}} \newcommand{\Evi}[1]{\nrg_\mathrm{V#1}} \newcommand{\ET} {\nrg_{\mathrm{T}}} \newcommand{\rt} {r_{\mathrm{t}}} \newcommand{\ft} {\ensuremath{f_{\mathrm{t}}}} \newcommand{\Qt} {\ensuremath{Q_{\mathrm{t}}}} \newcommand{\Ei} {\nrg_{\mathrm{i}}} \newcommand{\Eref} {\ensuremath{\nrg_{\mathrm{ref}}}} \newcommand{\Ethr} {\nrg_{\mathrm{thr}}} \newcommand{\Eg} {\nrg_{\mathrm{g}}} \newcommand{\Egz} {\nrg_{\mathrm{g,0}}} \newcommand{\Eoff} {\nrg_{\mathrm{off}}} \newcommand{\GnII}{G_{n}^{\mathrm{II}}} \newcommand{\GpII}{G_{p}^{\mathrm{II}}} \newcommand{\NA}{N_\mathrm{A}} \newcommand{\ND}{N_\mathrm{D}} \newcommand{\Nc}{N_\mathrm{C}} \newcommand{\Nv}{N_\mathrm{V}} \newcommand{\Ncv}{N_\mathrm{C,V}} \newcommand{\Ncz}{N_\mathrm{C,0}} \newcommand{\Nvz}{N_\mathrm{V,0}} \newcommand{\Ni}{N_{\mathrm{i}}} \newcommand{\NT}{N_{\mathrm{T}}} \newcommand{\Ntot}{N_{\mathrm{tot}}} \newcommand{\TC}{T_{\mathrm{C}}} \newcommand{\RDIR}{R^{\mathrm{DIR}}} \newcommand{\RAU}{R^{\mathrm{AU}}} \newcommand{\RBB}{R^{\mathrm{BB}}} \newcommand{\RII}{R^{\mathrm{II}}} \newcommand{\RSRH}{R^{\mathrm{SRH}}} \newcommand{\RT}{R_{\mathrm{T}}} \newcommand{\TL}{T_\mathrm{L}} \newcommand{\TLo}{T_{L,\mathrm{old}}} \newcommand{\tauen}{\tau_{\epsilon,n}} \newcommand{\tauep}{\tau_{\epsilon,p}} \newcommand{\taue}[1]{\tau_{\epsilon,#1}} \newcommand{\TCoK}{\left(\frac{T_n}{\mathrm{300\ K}}+C_0\right)} \newcommand{\PhiB} {\ensuremath{\Phi_\mathrm{B}}} \newcommand{\Phit} {\ensuremath{\Phi_\mathrm{t}}} \newcommand{\deriv} {\ensuremath {\mathrm{d}}} \newcommand{\Schro} {\persname{Schr{\"o}\-dinger}} \newcommand{\Emin} {\ensuremath{\nrg_\mathrm{min}}} \newcommand{\Emax} {\ensuremath{\nrg_\mathrm{max}}} \newcommand{\Ef} {\ensuremath{\nrg_\mathrm{F}}} \newcommand{\Efi} {\ensuremath{\nrg_{\mathrm{F},i}}} \newcommand{\ef}[1] {\ensuremath{\nrg_\mathrm{F#1}}} \newcommand{\VEold}{\vect{E}_{\mathrm{old}}} \newcommand{\VEoldi}[1]{\vect{E}_{\mathrm{old} #1}} \newcommand{\VPold}{\vect{P}_{\mathrm{old}}} \newcommand{\VPoldi}[1]{\vect{P}_{\mathrm{old} #1}} \newcommand{\VP}[1]{\vect{P}_{#1}} \newcommand{\Vg} {\ensuremath{V_\mathrm{G}}} \newcommand{\Iin} {\ensuremath{I_\mathrm{in}}} \newcommand{\Iout} {\ensuremath{I_\mathrm{out}}} \newcommand{\Vin} {\ensuremath{V_\mathrm{in}}} \newcommand{\Vout} {\ensuremath{V_\mathrm{out}}} \newcommand{\Vox} {\ensuremath{V_\mathrm{ox}}} \newcommand{\VDS} {\ensuremath{V_\mathrm{DS}}} \newcommand{\Id} {\ensuremath{I_\mathrm{d}}} \newcommand{\Vd} {\ensuremath{V_\mathrm{d}}} \newcommand{\Is} {\ensuremath{I_\mathrm{s}}} \newcommand{\te} {\ensuremath{\tau_\mathrm{e}}} \newcommand{\tc} {\ensuremath{\tau_\mathrm{c}}} \newcommand{\Nt} {\ensuremath{N_\mathrm{t}}} \newcommand{\Et} {\ensuremath{\nrg_\mathrm{t}}} \newcommand{\tox}{\ensuremath{t_\mathrm{ox}}} \newcommand{\Di} {\ensuremath{D_\mathrm{i}}} % install screen-shots %% \newcommand{\leftpic}[3]{% %% \parpic[l]{% %% \psblurbox[linecolor=white,linewidth=0,framesep=-0.04,shadowsize=0.2cm,blurradius=0.1cm]{% %% \includegraphics[width=#1]{#2}} } #3 %% \picskip{0} %% \bigskip %% } %% \newcommand{\rightpic}[3]{% %% \parpic[r]{% %% \psblurbox[linecolor=white,linewidth=0,framesep=-0.04,shadowsize=0.2cm,blurradius=0.1cm]{% %% \includegraphics[width=#1]{#2}} } #3 %% \picskip{0} %% \bigskip %% } \newcommand{\leftpic}[3]{% \begin{minipage}{0.48\linewidth} \includegraphics[width=\linewidth]{#2} \end{minipage} \hfill \begin{minipage}{0.48\linewidth} #3 \end{minipage} } \newcommand{\rightpic}[3]{% \begin{minipage}{0.48\linewidth} #3 \end{minipage} \hfill \begin{minipage}{0.48\linewidth} \includegraphics[width=\linewidth]{#2} \end{minipage} } % makedevice \newcommand{\SiO}{SiO$_2$} \newcommand{\SiN}{Si$_3$N$_4$} % abbreviations \newcommand{\AIX} {\sc Aix} \newcommand{\ASCII} {\sc Ascii} \newcommand{\BICGSTAB} {\sc Bicg-stab} \newcommand{\BJT} {\sc Bjt} \newcommand{\case} {\sc Case} \newcommand{\CNT} {\sc Cnt} \newcommand{\cpu} {\sc Cpu} \newcommand{\EEPROM} {\sc Eeprom} \newcommand{\EKV} {\sc Ekv} \newcommand{\FET} {\sc Fet} \newcommand{\GMRES} {{\sc Gmres}(m)} \newcommand{\HBT} {\sc Hbt} \newcommand{\HEMT} {\sc Hemt} \newcommand{\IBM} {\sc Ibm} \newcommand{\ILU} {\sc Ilu} \newcommand{\ILUFAC} {Incomplete-{\LU} factorization} \newcommand{\IUE} {Institute for Microelectronics} \newcommand{\LU} {\sc Lu} \newcommand{\MOS} {\sc Mos} \newcommand{\MOSFET} {\sc Mosfet} \newcommand{\NMOS} {\sc Nmos} \newcommand{\OSF} {\sc Osf} \newcommand{\PMOS} {\sc Pmos} \newcommand{\SOI} {{\sc Soi}} \newcommand{\SOIFET} {\sc Soifet} \newcommand{\TBB} {\sc Tbb} \newcommand{\TUV} {Technical University Vienna} \newcommand{\mydollar} {\$} %$ \newcommand{\mytilde} {{\ensuremath{{\sim}}}} \newcommand{\MISSING}[1]{${}^*$\marginpar{${}^*$\textbf{Missing {#1}}}} \newcommand{\NI}{${}^*$\marginpar{${}^*$\textbf{Not implemented!}}} \newcommand{\BUG}{\marginpar{\includegraphics[width=1cm]{figures/bug.ps}}} % Mobility functions \newcommand{\mob}[1]{\mu^{\mathrm{#1}}} \newcommand{\con}[1]{C^{\mathrm{#1}}} \newcommand{\vs}{v^{\mathrm{sat}}} \newcommand{\phis}[1]{\varphi_{\mathrm{s#1}}} \newcommand{\psis} {\varphi_{\mathrm{s}}} \newcommand{\psim} {\varphi_{\mathrm{m}}} \newcommand{\psibi}{\psi_{\mathrm{bi}}} \newcommand{\psiwf}{\varphi_{\mathrm{wf}}} \newcommand{\psiox}{\varphi_{\mathrm{ins}}} \newcommand{\psiqfl}{\varphi_{\mathrm{qfl}}} \newcommand{\Ewf}{\nrg_{\mathrm{w}}} \newcommand{\eps}[1]{\varepsilon_{\mathrm{#1}}} \newcommand{\mL}{{\mathrm{L}}} \newcommand{\qf} {quasi-\persname{Fermi}} % is not coupled to index entries \newcommand{\BB} {\sc bb} \newcommand{\BGN} {\sc Bgn} \newcommand{\BNF} {\sc Bnf} \newcommand{\CQFL} {\sc Cqfl} \newcommand{\QTBM} {\sc Qtbm} \newcommand{\WKB} {\sc Wkb} \newcommand{\TFE} {\sc Tfe} \newcommand{\TE} {\sc Te} \newcommand{\SI} {\sc si} \newcommand{\Nit} {\ensuremath{N_\mathrm{it}}} \newcommand{\Nz} {\ensuremath{N_0}} \newcommand{\kf} {\ensuremath{k_\mathrm{f}}} \newcommand{\kfz} {\ensuremath{k_\mathrm{f,0}}} \newcommand{\kr} {\ensuremath{k_\mathrm{r}}} \newcommand{\NXz} {\ensuremath{N_{X,\mathrm{it}}}} \newcommand{\ps} {\ensuremath{p_\mathrm{s}}} \newcommand{\Eox} {\ensuremath{E_\mathrm{ox}}} \newcommand{\pref} {\ensuremath{p_\mathrm{ref}}} \newcommand{\Qit} {\ensuremath{Q_\mathrm{it}}} \newcommand{\Qitsimple} {\ensuremath{Q_\mathrm{it}^\mathrm{simple}}} \newcommand{\Dt} {\ensuremath{D_\mathrm{t}}} \newcommand{\Qf} {\ensuremath{Q_\mathrm{f}}} \newcommand{\EF} {E_{\mathrm{F}}} \newcommand{\Vth} {V_\mathrm{th}} \newcommand{\Cox} {\ensuremath{C_\mathrm{ox}}} \newcommand{\NX} {\ensuremath{N_X}} \newcommand{\DX} {\ensuremath{D_X}} \newcommand{\DXz} {\ensuremath{D_{X,0}}} \newcommand{\PhiX} {\ensuremath{\Phi_X}} ViennaCL-1.5.1-src/doc/manual/operations.tex000644 001750 001750 00000027727 12255634117 020734 0ustar00rupprupp000000 000000 \chapter{Basic Operations} \label{chap:operations} The basic types have been introduced in the previous chapter, so we move on with the description of the basic BLAS operations. Almost all operations supported by {\ublas} are available, including element-wise operations on vectors. Thus, consider the \href{http://www.boost.org/doc/libs/1_52_0/libs/numeric/ublas/doc/operations_overview.htm}{ublas-documentation} as a reference as well. \section{Vector-Vector and Elementary Matrix-Matrix Operations (BLAS Level 1)} {\ViennaCL} provides all vector-vector operations defined at level 1 of BLAS. Tab.~\ref{tab:blas-level-1} shows how these operations can be carried out in \ViennaCL. The function interface is compatible with {\ublas}, thus allowing quick code migration for {\ublas} users. Element-wise operations and standard operator overloads are available for dense matrices as well. The only dense matrix norm provided is \lstinline|norm_frobenius()| for the Frobenius norm. \TIP{For full details on level 1 functions, refer to the reference documentation located in \texttt{doc/doxygen/}} \NOTE{Mixing operations between objects of different scalar types is not supported. Convert the data manually on the host if needed.} \begin{table}[tb] \begin{center} \begin{tabular}{l|l|p{6cm}} Verbal & Mathematics & ViennaCL\\ \hline swap & $x \leftrightarrow y$ & \lstinline|swap(x,y);| \\ stretch & $x \leftarrow \alpha x$ & \lstinline|x *= alpha;| \\ assignment & $y \leftarrow x$ & \lstinline|y = x;| \\ multiply add & $y \leftarrow \alpha x + y$ & \lstinline|y += alpha * x;| \\ multiply subtract & $y \leftarrow \alpha x - y$ & \lstinline|y -= alpha * x;| \\ inner dot product & $\alpha \leftarrow x^{\mathrm{T}} y$ & \lstinline|inner_prod(x,y);| \\ $L^1$ norm & $\alpha \leftarrow \Vert x \Vert_1$ & \lstinline|alpha = norm_1(x);| \\ $L^2$ norm & $\alpha \leftarrow \Vert x \Vert_2$ & \lstinline|alpha = norm_2(x);| \\ $L^\infty$ norm & $\alpha \leftarrow \Vert x \Vert_\infty$ & \lstinline|alpha = norm_inf(x);| \\ $L^\infty$ norm index& $i \leftarrow \max_i \vert x_i \vert$ & \lstinline|i = index_norm_inf(x);| \\ plane rotation & $(x,y) \leftarrow (\alpha x + \beta y, -\beta x + \alpha y)$ & \lstinline|plane_rotation(a, b, x, y);| \\ \hline elementwise product & $y_i \leftarrow x_i \cdot z_i$ & \lstinline|y = element_prod(x,z);| \\ elementwise division & $y_i \leftarrow x_i \cdot z_i$ & \lstinline|y = element_div(x,z);| \\ elementwise power & $y_i \leftarrow x_i^{z_i}$ & \lstinline|y = element_pow(x,z);| \\ \hline elementwise modulus (ints) & $y_i \leftarrow |x_i|$ & \lstinline|y = element_abs(x);| \\ elementwise modulus (floats) & $y_i \leftarrow |x_i|$ & \lstinline|y = element_fabs(x);| \\ elementwise acos & $y_i \leftarrow \textrm{acos}(x_i)$ & \lstinline|y = element_acos(x);| \\ elementwise asin & $y_i \leftarrow \textrm{asin}(x_i)$ & \lstinline|y = element_asin(x);| \\ elementwise atan & $y_i \leftarrow \textrm{atan}(x_i)$ & \lstinline|y = element_atan(x);| \\ elementwise ceil & $y_i \leftarrow \lceil x_i \rceil$ & \lstinline|y = element_ceil(x);| \\ elementwise cos & $y_i \leftarrow \textrm{cos}(x_i)$ & \lstinline|y = element_cos(x);| \\ elementwise cosh & $y_i \leftarrow \textrm{cosh}(x_i)$ & \lstinline|y = element_cosh(x);| \\ elementwise exp & $y_i \leftarrow \textrm{exp}(x_i)$ & \lstinline|y = element_exp(x);| \\ elementwise floor & $y_i \leftarrow \lfloor x_i \rfloor $ & \lstinline|y = element_floor(x);| \\ elementwise log (base e) & $y_i \leftarrow \textrm{ln}(x_i)$ & \lstinline|y = element_log(x);| \\ elementwise log (base 10) & $y_i \leftarrow \textrm{log}_{10}(x_i)$ & \lstinline|y = element_log10(x);| \\ elementwise sin & $y_i \leftarrow \textrm{sin}(x_i)$ & \lstinline|y = element_sin(x);| \\ elementwise sinh & $y_i \leftarrow \textrm{sinh}(x_i)$ & \lstinline|y = element_sinh(x);| \\ elementwise sqrt & $y_i \leftarrow \textrm{sqrt}(x_i)$ & \lstinline|y = element_sqrt(x);| \\ elementwise tan & $y_i \leftarrow \textrm{tan}(x_i)$ & \lstinline|y = element_tan(x);| \\ elementwise tanh & $y_i \leftarrow \textrm{tanh}(x_i)$ & \lstinline|y = element_tanh(x);| \\ \end{tabular} \caption{BLAS level 1 routines mapped to {\ViennaCL}. Note that the free functions reside in namespace \texttt{viennacl::linalg}} \label{tab:blas-level-1} \end{center} \end{table} \section{Matrix-Vector Operations (BLAS Level 2)} The interface for level 2 BLAS functions in {\ViennaCL} is similar to that of {\ublas} and shown in Tab.~\ref{tab:blas-level-2}. \TIP{For full details on level 2 functions, refer to the reference documentation located in \texttt{doc/doxygen/}} \NOTE{Mixing operations between objects of different scalar types is not supported. Convert the data manually on the host if needed.} \begin{table}[tb] \begin{center} \renewcommand{\arraystretch}{1.2} \begin{tabular}{p{4cm}|l|p{7cm}} Verbal & Mathematics & ViennaCL\\ \hline matrix vector product & $y \leftarrow A x$ & \lstinline|y = prod(A, x);| \\ matrix vector product & $y \leftarrow A^\mathrm{T} x$ & \lstinline|y = prod(trans(A), x);| \\ inplace mv product & $x \leftarrow A x$ & \lstinline|x = prod(A, x);| \\ inplace mv product & $x \leftarrow A^\mathrm{T} x$ & \lstinline|x = prod(trans(A), x);| \\ \hline scaled product add & $y \leftarrow \alpha A x + \beta y$ & \lstinline|y = alpha * prod(A, x) + beta * y| \\ scaled product add & $y \leftarrow \alpha A^{\mathrm T} x + \beta y$ & \lstinline|y = alpha * prod(trans(A), x) + beta * y| \\ \hline tri. matrix solve & $y \leftarrow A^{-1} x$ & \lstinline|y = solve(A, x, tag);| \\ tri. matrix solve & $y \leftarrow A^\mathrm{T^{-1}} x$ & \lstinline|y = solve(trans(A), x, tag);| \\ inplace solve & $x \leftarrow A^{-1} x$ & \lstinline|inplace_solve(A, x, tag);| \\ inplace solve & $x \leftarrow A^\mathrm{T^{-1}} x$ & \lstinline|inplace_solve(trans(A), x, tag);| \\ \hline rank 1 update & $A \leftarrow \alpha x y^{\mathrm T} + A$ & \lstinline|A += alpha * outer_prod(x,y);| \\ symm. rank 1 update & $A \leftarrow \alpha x x^{\mathrm T} + A$ & \lstinline|A += alpha * outer_prod(x,x);| \\ rank 2 update & $A \leftarrow \alpha (x y^{\mathrm T} + y x^{\mathrm T}) + A$ & \lstinline|A += alpha * outer_prod(x,y);| \lstinline|A += alpha * outer_prod(y,x);| \\ \end{tabular} \caption{BLAS level 2 routines mapped to \ViennaCL. Note that the free functions reside in namespace \texttt{viennacl::linalg}. \lstinline|tag| is one out of \lstinline|lower_tag|, \lstinline|unit_lower_tag|, \lstinline|upper_tag|, and \lstinline|unit_upper_tag|.} \label{tab:blas-level-2} \end{center} \end{table} \section{Matrix-Matrix Operations (BLAS Level 3)} Full BLAS level 3 support is since {\ViennaCL} 1.1.0, cf.~Tab.~\ref{tab:blas-level-3}. While BLAS levels 1 and 2 are mostly memory-bandwidth-limited, BLAS level 3 is mostly limited by the available computational power of the respective device. Hence, matrix-matrix products regularly show impressive performance gains on mid- to high-end GPUs when compared to a single CPU core. Again, the {\ViennaCL} API is identical to that of {\ublas} and comparisons can be carried out immediately, as is shown in the tutorial located in \texttt{examples/tutorial/blas3.cpp}. As for performance, {\ViennaCL} yields decent performance gains at BLAS level 3 on mid- to high-end GPUs compared to CPU implementations using a single core only. However, highest performance is usually obtained only with careful tuning to the respective target device. Generally, {\ViennaCL} provides kernels that represent a good compromise between efficiency and portability among a large number of different devices and device types. \TIP{ For certain matrix dimensions, typically multiples of 64 or 128, {\ViennaCL} also provides tuned kernels reaching over 1 TFLOP in single precision (AMD HD 7970). } \NOTE{Mixing operations between objects of different scalar types is not supported. Convert the data manually on the host if needed.} \begin{table}[tb] \begin{center} \renewcommand{\arraystretch}{1.2} \begin{tabular}{p{4cm}|l|p{7.5cm}} Verbal & Mathematics & ViennaCL\\ \hline matrix-matrix product & $C \leftarrow A \times B$ & \lstinline|C = prod(A, B);| \\ matrix-matrix product & $C \leftarrow A \times B^\mathrm{T}$ & \lstinline|C = prod(A, trans(B));| \\ matrix-matrix product & $C \leftarrow A^\mathrm{T} \times B$ & \lstinline|C = prod(trans(A), B);| \\ matrix-matrix product & $C \leftarrow A^\mathrm{T} \times B^\mathrm{T}$ & \lstinline|C = prod(trans(A), trans(B));| \\ \hline tri. matrix solve & $C \leftarrow A^{-1} B$ & \lstinline|C = solve(A, B, tag);| \\ tri. matrix solve & $C \leftarrow A^\mathrm{T^{-1}} B$ & \lstinline|C = solve(trans(A), B, tag);| \\ tri. matrix solve & $C \leftarrow A^{-1} B^\mathrm{T}$ & \lstinline|C = solve(A, trans(B), tag);| \\ tri. matrix solve & $C \leftarrow A^\mathrm{T^{-1}} B^\mathrm{T}$ & \lstinline|C = solve(trans(A), trans(B), tag);| \\ % inplace solve & $B \leftarrow A^{-1} B$ & \lstinline|inplace_solve(A, trans(B), tag);| \\ inplace solve & $B \leftarrow A^\mathrm{T^{-1}} B$ & \lstinline|inplace_solve(trans(A), x, tag);| \\ inplace solve & $B \leftarrow A^{-1} B^\mathrm{T}$ & \lstinline|inplace_solve(A, trans(B), tag);| \\ inplace solve & $B \leftarrow A^\mathrm{T^{-1}} B^\mathrm{T}$ & \lstinline|inplace_solve(trans(A), x, tag);| \\ \end{tabular} \caption{BLAS level 3 routines mapped to \ViennaCL. Note that the free functions reside in namespace \texttt{viennacl::linalg}} \label{tab:blas-level-3} \end{center} \end{table} \section{Initializer Types} \NOTE{Initializer types in {\ViennaCLversion} can currently only be used for initializing vectors and matrices, not for computations!} In order to initialize vectors, the following initializer types are provided, again similar to {\ublas}: \begin{center} \begin{tabular}{|l|p{10cm}|} \hline \lstinline|unit_vector(s, i)| & Unit vector of size $s$ with entry $1$ at index $i$, zero elsewhere. \\ \hline \lstinline|zero_vector(s)| & Vector of size $s$ with all entries being zero. \\ \hline \lstinline|scalar_vector(s, v)| & Vector of size $s$ with all entries equal to $v$. \\ \hline \lstinline|random_vector(s, d)| & Vector of size $s$ with all entries random according to the distribution specified by $d$. \\ \hline \end{tabular} \end{center} For example, to initialize a vector \lstinline|v1| with all $42$ entries being $42.0$, use \begin{lstlisting} viennacl::vector v1 = viennacl::scalar_vector(42, 42.0f); \end{lstlisting} Similarly the following initializer types are available for matrices: \begin{center} \begin{tabular}{|l|p{10cm}|} \hline \lstinline|identity_matrix(s, i)| & Identity matrix of dimension $s \times s$. \\ \hline \lstinline|zero_matrix(s1, s2)| & Matrix of size $s_1 \times s_2$ with all entries being zero. \\ \hline \lstinline|scalar_matrix(s1, s2, v)| & Matrix of size $s_1 \times s_2$ with all entries equal to $v$. \\ \hline \lstinline|random_matrix(s1, s2, d)| & Vector of size $s$ with all entries random according to the distribution specified by $d$. \\ \hline \end{tabular} \end{center} \section{Row, Column, and Diagonal Extraction} For many algorithms it is of interest to extract a single row or column of a dense matrix, or to access the matrix diagonal. This is provided in the same way as for Boost.uBLAS through the free functions \lstinline|row()|, \lstinline|column()|, and \lstinline|diag()|: \begin{lstlisting} // A is a viennacl::matrix // Extract 5-th row of A, then overwrite with 6-th diagonal: viennacl::vector r = viennacl::row(A, 4); r = viennacl::row(A, 5); // Extract 4-th column of A, then overwrite with second column: viennacl::vector c = viennacl::column(A, 3); c = viennacl::column(A, 1); // Extract diagonal: viennacl::vector d = viennacl::diag(A); \end{lstlisting} The function \lstinline|diag| can also be used to create a matrix which has the provided vector entries in the off-diagonal: \begin{lstlisting} // Create the matrix // 0 1 0 0 // 0 0 2 0 // 0 0 0 3 viennacl::vector v(3); v[0] = 1.0f; v[1] = 2.0f; v[2] = 3.0f; viennacl::matrix A = viennacl::diag(v, 1); \end{lstlisting} This is similar to MATLAB's \lstinline|diag()| function. ViennaCL-1.5.1-src/doc/manual/cover.tex000644 001750 001750 00000001434 12267304052 017645 0ustar00rupprupp000000 000000 \begin{titlepage} \vspace*{3cm} \Huge{ViennaCL 1.5.1} \rule[0.0cm]{9.5cm}{0.05cm} \begin{flushright} \Large{User Manual} %\Large{Building Instructions}\\ %\Large{Input Specifications} \end{flushright} \vspace{13cm} \rule[0.0cm]{16.0cm}{0.05cm} \begin{figure}[!ht] \vspace{-1.0cm} \centering \begin{minipage}{3cm} \epsfig{file=figures/TU_Signet_CMYK, scale=0.25} \end{minipage} \hfill \hspace{-0.5cm} \begin{minipage}{5.5cm} \vspace{0.5cm} \begin{center} Institute for Microelectronics\newline Gu\ss hausstra\ss e 27-29 / E360\newline A-1040 Wien, Austria\newline \end{center} \end{minipage} \hfill \begin{minipage}{2.6cm} \epsfig{file=figures/logo_px200, scale=1.6} \end{minipage} \end{figure} \end{titlepage} ViennaCL-1.5.1-src/doc/manual/custom-kernels.tex000644 001750 001750 00000012202 12255634117 021502 0ustar00rupprupp000000 000000 \chapter{Custom OpenCL Compute Kernels} \label{chap:custom} For custom algorithms the built-in functionality of {\ViennaCL} may not be sufficient or not fast enough. In such cases it can be desirable to write a custom {\OpenCL} compute kernel, which is explained in this chapter. The following steps are necessary and explained one after another: \begin{itemize} \item Write the {\OpenCL} source code \item Compile the compute kernel \item Launching the kernel \end{itemize} A tutorial on this topic can be found at \texttt{examples/tutorial/custom-kernels.cpp}. \section{Setting up the {\OpenCL} Source Code} The {\OpenCL} source code has to be provided as a string. One can either write the source code directly into a string within C++ files, or one can read the {\OpenCL} source from a file. For demonstration purposes, we write the source directly as a string constant: \begin{lstlisting} const char * my_compute_program = "__kernel void elementwise_prod(\n" " __global const float * vec1,\n" " __global const float * vec2, \n" " __global float * result,\n" " unsigned int size) \n" "{ \n" " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" " result[i] = vec1[i] * vec2[i];\n" "};\n"; \end{lstlisting} The kernel takes three vector arguments \lstinline{vec1}, \lstinline{vec2} and \lstinline{result} and the vector length variable \lstinline{size} abd computes the entry-wise product of the vectors \lstinline|vec1| and \lstinline|vec2| and writes the result to the vector \lstinline|result|. For more detailed explanation of the {\OpenCL} source code, please refer to the specification available at the Khronos group webpage \cite{khronoscl}. \section{Compilation of the {\OpenCL} Source Code} The source code in the string constant \lstinline{my_compute_kernel} has to be compiled to an {\OpenCL} program. An {\OpenCL} program is a compilation unit and may contain several different compute kernels, so one could also include another kernel function \lstinline{inplace_elementwise_prod} which writes the result directly to one of the two operands \lstinline{vec1} or \lstinline{vec2} in the same program. \begin{lstlisting} viennacl::ocl::program & my_prog = viennacl::ocl::current_context().add_program(my_compute_program, "my_compute_program"); \end{lstlisting} The next step is to extract the kernel object \lstinline|my_kernel| from the compiled program (an explicit kernel registration was needed prior to ViennaCL 1.5.0, but is no longer needed): \begin{lstlisting} viennacl::ocl::kernel & my_kernel = my_prog.get_kernel("elementwise_prod"); \end{lstlisting} Now, the kernel is set up to use the function \lstinline|elementwise_prod| compiled into the program \lstinline|my_prog|. \NOTE{Note that C++ references to kernels and programs may become invalid as other kernels or programs are added. Therefore, first allocate the required {\ViennaCL} objects and compile/add all custom kernels, before you start taking references to custom programs or kernels.} Instead of extracting references to programs and kernels directly at program compilation, one can obtain them at other places within the application source code by \begin{lstlisting} viennacl::ocl::program & prog = viennacl::ocl::current_context().get_program("my_compute_program"); viennacl::ocl::kernel & my_kernel = my_prog.get_kernel("elementwise_prod"); \end{lstlisting} This simplifies application development considerably, since no program and kernel objects need to be passed around. \section{Launching the {\OpenCL} Kernel} Before launching the kernel, one may adjust the global and local work sizes (readers not familiar with that are encouraged to read the {\OpenCL} standard \cite{khronoscl}). The following code specifies a one-dimensional execution model with 16 local workers and 128 global workers: \begin{lstlisting} my_kernel.local_work_size(0, 16); my_kernel.global_work_size(0, 128); \end{lstlisting} In order to use a two-dimensional execution, additionally parameters for the second dimension are set by \begin{lstlisting} my_kernel.local_work_size(1, 16); my_kernel.global_work_size(1, 128); \end{lstlisting} However, for the simple kernel in this example it is not necessary to specify any work sizes at all. The default work sizes (which can be found in \texttt{viennacl/ocl/kernel.hpp}) suffice for most cases. To launch the kernel, the kernel arguments are set in the same way as for ordinary functions. We assume that three {\ViennaCL} vectors \lstinline|vec1|, \lstinline|vec2| and \lstinline|result| have already been set up: \begin{lstlisting} viennacl::ocl::enqueue(my_kernel(vec1, vec2, result, cl_uint(vec1.size()))); \end{lstlisting} Per default, the kernel is enqueued in the first queue of the currently active device. A custom queue can be specified as optional second argument, cf.~the reference documentation located in \texttt{doc/doxygen/}. \TIP{Integer arguments need to be provided using the corresponding OpenCL types \lstinline|cl_int|, \lstinline|cl_uint|, etc. Do not pass arguments of type \lstinline|size_t|, because \lstinline|size_t| might differ on the host and the compute device.}ViennaCL-1.5.1-src/doc/manual/tuning.tex000644 001750 001750 00000005600 12235435247 020040 0ustar00rupprupp000000 000000 \chapter{OpenCL Kernel Parameter Tuning} \label{chap:tuning} The choice of the global and local work sizes for {\OpenCL} kernels typically has a considerable impact on the obtained device performance. The default setting in {\ViennaCL} is -- with some exceptions -- to use the same number of work groups and work items per work group (128) for each compute kernel. To obtain highest performance, optimal work sizes have to be determined for each kernel in dependence of the underlying device. \section{Start Tuning Runs} {\ViennaCLversion} ships with a automated tuning environment, which tries to determine the best kernel parameters for the available device. At present, only kernel parameters for the first device are optimized. The tuning programs are located in \begin{itemize} \item \texttt{examples/parameters/vector.cpp}: Tuning for vector kernels \item \texttt{examples/parameters/matrix.cpp}: Tuning for matrix kernels \item \texttt{examples/parameters/sparse.cpp}: Tuning for sparse matrix kernels \end{itemize} and are built together with other examples when using {\CMake}. The executables are \begin{itemize} \item \texttt{vectorparams}, \item \texttt{matrixparams}, \item \texttt{sparseparams} \end{itemize} respectively and are executed without additional parameters. During execution, these programs create three XML files \texttt{vector\_parameters.xml}, \texttt{matrix\_parameters.xml} and \texttt{sparse\_parameters.xml}, which hold the best parameter set. At present, only {\ViennaCL} types with standard alignment are benchmarked. Higher performance can be obtained when allowing further memory alignments and comparing different implementations. This, however, is not yet available, but may be part of future versions. \section{Load Best Parameters at Startup} In order to load the best parameters at each startup, the parameter reader located at \texttt{viennacl/io/kernel\_parameters.hpp} can be used. The individual kernels for the respective {\ViennaCL} types can be loaded with the lines \begin{lstlisting} using viennacl::io; read_kernel_parameters< viennacl::vector >("vector_parameters.xml"); read_kernel_parameters< viennacl::matrix >("matrix_parameters.xml"); read_kernel_parameters< viennacl::compressed_matrix >("sparse_parameters.xml"); //similarly for the numeric type double \end{lstlisting} where the filename is as usual relative to current working directory. A simple example doing just that can be found in \texttt{examples/parameters/parameter\_reader.cpp}. In principle, kernel parameters can all be located in a single XML file, from which the call to \lstinline|read_kernel_parameters()| will then extract the relevant ones for the respective {\ViennaCL} type and the available device. \TIP{Please note that in order to read the parameters, the project has to be linked with \texttt{pugixml} \cite{pugixml}, which is shipped with {\ViennaCL} in \texttt{external/} } ViennaCL-1.5.1-src/doc/manual/setup.tex000644 001750 001750 00000006767 12235435247 017713 0ustar00rupprupp000000 000000 % Define margins \voffset=-1in \hoffset=-1in % A4 \headheight=4ex \headsep=2ex \evensidemargin=2.5cm \oddsidemargin=2.5cm %\topmargin=1.3cm \topmargin=0.0cm \marginparwidth=1.5cm \textwidth=16.0cm \textheight=23.9cm \footskip=6ex \parindent=0cm \parskip=1ex \newlength{\mmntboxwidth} \setlength{\mmntboxwidth}{\textwidth} \addtolength{\mmntboxwidth}{-2mm} \setcounter{secnumdepth}{4} \setcounter{tocdepth}{2} \shortindexingoff %\newindex{idxkey}{idxkey}{indkey}{Index of Keywords} %\newindex{idxipd}{idxipd}{indipd}{Index of Material Parameters} \makeindex \newcommand{\auxindex}[1]{\index{auxiliary tools!#1@\texttt{#1}}% \index{#1@\texttt{#1}}} \newlength{\NoteBoxWidth} \setlength{\NoteBoxWidth}{14.5cm}%{15.18cm} \newrgbcolor{notebg} {0.99609375 0.8828125 0.921875} % PSTricks colors \newrgbcolor{tipbg} {0.98828125 0.99609375 0.78515625} \newrgbcolor{ipdbg} {0.85 0.85 0.93} \definecolor{headingfg} {rgb}{0.2,0.2,0.6} \newrgbcolor{headingfg} {0.2 0.2 0.6} % PSTricks colors \newlength{\IpdBoxWidth} \setlength{\IpdBoxWidth}{14.5cm} \newcommand{\verbbaseformat}[2] {\psframebox*[boxsep=false,fillcolor=#1]{ %\parbox{\IpdBoxWidth}{\color{black}#2\raisebox{-1ex}{\rule{0pt}{2.6ex}}}}} \parbox{\IpdBoxWidth}{\color{black}#2\raisebox{-1.5ex}{\rule{0pt}{2.6ex}}}}} \newcommand{\verbformat}[1] {\verbbaseformat{\verbatimbg}{#1}} \newcommand{\verbatimbg} {ipdbg} \renewcommand{\FancyVerbFormatLine}[1]{\verbformat{#1}} \newcommand{\EXA}{} \newcommand{\mmntcaparg}{} \newcommand{\mmntlabarg}{} \newcommand{\stdin}[1]{\texttt{#1}} \newcommand{\unit}[1]{$\mathrm{#1}$} \makeatletter % [JW] check latex faq for : \@ and @ in macro names \newcommand{\mmnttablecaption}[1]{% \par \addcontentsline{lot}{table}{\protect\numberline{thetable}{\ignorespaces #1}} \refstepcounter{table}\@makecaption{\fnum@table}{#1\rule[-2mm]{0mm}{3mm}}} \newcommand{\TIP}[1]{\vspace*{0.4cm} \marginpar[{\vspace{0.1cm}\hspace{ 0.8cm}\includegraphics[width=0.7cm]{figures/tip.eps}}]% {{\vspace{0.1cm}\hspace{-0.2cm}\includegraphics[width=0.7cm]{figures/tip.eps}}}% \hspace{0.5cm}\psframebox*[fillcolor=tipbg]{\parbox{\NoteBoxWidth}{#1}}\vspace*{0.4cm}} \newcommand{\NOTE}[1]{\vspace*{0.4cm} \marginpar[{\vspace{0.15cm}\hspace{ 0.8cm}\includegraphics[width=0.7cm]{figures/note.eps}}]% {{\vspace{0.15cm}\hspace{-0.2cm}\includegraphics[width=0.7cm]{figures/note.eps}}}% \hspace{0.5cm}\psframebox*[fillcolor=notebg]{\parbox{\NoteBoxWidth}{#1}}\vspace*{0.4cm}} \newcommand{\listitem} {\psframe[fillstyle=gradient,gradbegin=headingfg,gradend=headingfg,gradmidpoint=1,linestyle=none](-0.15,0.075)(0.0,0.225)} \newenvironment{mmnttable}[3] {\renewcommand{\mmntcaparg}{#3}\par\vspace{3mm}\begin{minipage}{\textwidth} \begin{center}\par\begin{tabular}{#1} \hline #2 \\ \hline} {\hline \end{tabular}\mmnttablecaption{\mmntcaparg}\end{center}\end{minipage}} \makeatother \newenvironment{exaipd} { %\begin{list}{}{\leftmargin6mm} \scriptsize\item[]\EXA } \begin{list}{}{\leftmargin6mm} \scriptsize\item[]\EXA } {\end{list}} \newenvironment{mmnttableL}[4] {\renewcommand{\mmntcaparg}{#3}\renewcommand{\mmntlabarg}{#4}\par \vspace{3mm}\begin{minipage}{\textwidth}\begin{center}\par\begin{tabular}{#1} \hline #2 \\ \hline} {\hline \end{tabular}\mmnttablecaption{\mmntcaparg}\label{\mmntlabarg} \end{center}\end{minipage}} \newenvironment{mmntlist} {\par\vspace*{-0.1cm}% \begin{list}{\listitem}% {\setlength{\itemsep}{0.001cm}}% }% {\end{list}\vspace*{-0.1cm}} ViennaCL-1.5.1-src/doc/manual/versioning.tex000644 001750 001750 00000003346 12235435247 020724 0ustar00rupprupp000000 000000 \chapter{Versioning}% \addcontentsline{toc}{chapter}{Versioning} Each release of {\ViennaCL} carries a three-fold version number, given by\\ \begin{center} \texttt{ViennaCL X.Y.Z} . \\ \end{center} For users migrating from an older release of {\ViennaCL} to a new one, the following guidelines apply: \begin{itemize} \item \texttt{X} is the \emph{major version number}, starting with \texttt{1}. A change in the major version number is not necessarily API-compatible with any versions of ViennaCL carrying a different major version number. In particular, end users of {\ViennaCL} have to expect considerable code changes when changing between different major versions of {\ViennaCL}. \item \texttt{Y} denotes the \emph{minor version number}, restarting with zero whenever the major version number changes. The minor version number is incremented whenever significant functionality is added to {\ViennaCL}. The API of an older release of {\ViennaCL} with smaller minor version number (but same major version number) is \emph{essentially} compatible to the new version, hence end users of {\ViennaCL} usually do not have to alter their application code, unless they have used a certain functionality that was not intended to be used and removed in the new version. \item \texttt{Z} is the \emph{revision number}. If either the major or the minor version number changes, the revision number is reset to zero. Releases of {\ViennaCL}, that only differ in their revision number, are API compatible. Typically, the revision number is increased whenever bugfixes are applied, compute kernels are improved or some extra, not significant functionality is added. \end{itemize} \TIP{Always try to use the latest version of {\ViennaCL} before submitting bug reports!} ViennaCL-1.5.1-src/doc/manual/other-libs.tex000644 001750 001750 00000013547 12235435247 020615 0ustar00rupprupp000000 000000 \chapter{Interfaces to Other Libraries} \label{chap:other-libs} {\ViennaCL} aims at compatibility with as many other libraries as possible. This is on the one hand achieved by using generic implementations of the individual algorithms, and on the other hand by providing the necessary wrappers. The interfaces to third-party libraries provided with {\ViennaCL} are explained in the following subsections. Please feel free to suggest additional libraries for which an interface should be shipped with {\ViennaCL}. Since it is unlikely that all third-party libraries for which {\ViennaCL} provides interfaces are installed on the target machine, the wrappers are disabled by default. To selectively enable the wrappers, the appropriate preprocessor constants \lstinline|VIENNACL_WITH_XXXX| have to be defined \emph{prior to any \lstinline|\#include| statements for {\ViennaCL} headers}. This can for example be assured by passing the preprocessor constant directly when launching the compiler. With \lstinline|GCC| this is for instance achieved by the \lstinline|-D| switch. \section{Boost.uBLAS} Since all types in {\ViennaCL} have the same interface as their counterparts in {\ublas}, most code written for {\ViennaCL} objects remains valid when using {\ublas} objects. \begin{lstlisting} //Option 1: Using ViennaCL: using namespace viennacl; using namespace viennacl::linalg; //Option 2: Using ublas: //using namespace boost::numeric::ublas; matrix dense_matrix(5,5); vector dense_vector(5,5); compressed_matrix sparse_matrix(1000, 1000); //fill with data: dense_matrix(0,0) = 2.0; .... //run solvers vector result1 = solve(dense_matrix, dense_vector, upper_tag()); vector result2 = viennacl::linalg::solve(sparse_matrix, dense_vector, cg_tag()); \end{lstlisting} The above code is valid for either the {\ViennaCL} namespace declarations, or the {\ublas} namespace. Note that the iterative solvers are not part of {\ublas} and therefore the explicit namespace specification is required. More examples for the exchangability of {\ublas} and {\ViennaCL} can be found in the tutorials in the \texttt{examples/tutorials/} folder. When using the iterative solvers, the preprocessor constant \texttt{VIENNACL\_WITH\_UBLAS} must be defined prior to any other {\ViennaCL} include statements. This is essential for enabling the respective wrappers. \TIP{Refer in particular to \texttt{iterative-ublas.cpp} for a complete example on iterative solvers using {\ublas} types.} \section{\Eigen} To copy data from {\Eigen} \cite{eigen} objects (version 3.x.y) to {\ViennaCL}, the \texttt{copy()}-functions are used just as for {\ublas} and STL types: \begin{lstlisting} //from Eigen to ViennaCL viennacl::copy(eigen_vector, vcl_vector); viennacl::copy(eigen_densematrix, vcl_densematrix); viennacl::copy(eigen_sparsematrix, vcl_sparsematrix); \end{lstlisting} In addition, the STL-compliant iterator-version of \texttt{viennacl::copy()} taking three arguments can be used for copying vector data. Here, all types prefixed with \texttt{eigen} are {\Eigen} types, the prefix \texttt{vcl} indicates {\ViennaCL} objects. Similarly, the transfer from {\ViennaCL} back to {\Eigen} is accomplished by \begin{lstlisting} //from ViennaCL to Eigen viennacl::copy(vcl_vector, eigen_vector); viennacl::copy(vcl_densematrix, eigen_densematrix); viennacl::copy(vcl_sparsematrix, eigen_sparsematrix); \end{lstlisting} The iterative solvers in {\ViennaCL} can also be used directly with {\Eigen} objects: \begin{lstlisting} using namespace viennacl::linalg; //for brevity of the following lines eigen_result = solve(eigen_matrix, eigen_rhs, cg_tag()); eigen_result = solve(eigen_matrix, eigen_rhs, bicgstab_tag()); eigen_result = solve(eigen_matrix, eigen_rhs, gmres_tag()); \end{lstlisting} When using the iterative solvers with {\Eigen}, the preprocessor constant \texttt{VIENNACL\_WITH\_EIGEN} must be defined prior to any other {\ViennaCL} include statements. This is essential for enabling the respective wrappers. \TIP{Refer to \texttt{iterative-eigen.cpp} and \texttt{eigen-with-viennacl.cpp} for complete examples.} \section{MTL 4} The following lines demonstate how {\ViennaCL} types are filled with data from {\MTL} \cite{mtl4} objects: \begin{lstlisting} //from Eigen to ViennaCL viennacl::copy(mtl4_vector, vcl_vector); viennacl::copy(mtl4_densematrix, vcl_densematrix); viennacl::copy(mtl4_sparsematrix, vcl_sparsematrix); \end{lstlisting} In addition, the STL-compliant iterator-version of \texttt{viennacl::copy()} taking three arguments can be used for copying vector data. Here, all types prefixed with \texttt{mtl4} are {\MTL} types, the prefix \texttt{vcl} indicates {\ViennaCL} objects. Similarly, the transfer from {\ViennaCL} back to {\MTL} is accomplished by \begin{lstlisting} //from ViennaCL to MTL4 viennacl::copy(vcl_vector, mtl4_vector); viennacl::copy(vcl_densematrix, mtl4_densematrix); viennacl::copy(vcl_sparsematrix, mtl4_sparsematrix); \end{lstlisting} Even though {\MTL} provides its own set of iterative solvers, the iterative solvers in {\ViennaCL} can also be used: \begin{lstlisting} using namespace viennacl::linalg; //for brevity of the following lines mtl4_result = solve(mtl4_matrix, mtl4_rhs, cg_tag()); mtl4_result = solve(mtl4_matrix, mtl4_rhs, bicgstab_tag()); mtl4_result = solve(mtl4_matrix, mtl4_rhs, gmres_tag()); \end{lstlisting} Our internal tests have shown that the execution time of {\MTL} solvers is equal to {\ViennaCL} solvers when using {\MTL} types. When using the iterative solvers with {\MTL}, the preprocessor constant \texttt{VIENNACL\_WITH\_MTL4} must be defined prior to any other {\ViennaCL} include statements. This is essential for enabling the respective wrappers. \TIP{Refer to \texttt{iterative-mtl4.cpp} and \texttt{mtl4-with-viennacl.cpp} for complete examples.} ViennaCL-1.5.1-src/doc/manual/viennacl.bib000644 001750 001750 00000011714 12235435247 020272 0ustar00rupprupp000000 000000 % ----------------------------------------------- @misc{boost, title = {{Boost C++ Libraries}}, URL = {http://www.boost.org/} } % ----------------------------------------------- @misc{openmp, title = {{OpenMP}}, URL = {http://openmp.org} } % ----------------------------------------------- @misc{cmake, title = {{CMake}}, URL = {http://www.cmake.org/} } % ----------------------------------------------- @misc{khronoscl, title = {{Khronos OpenCL}}, URL = {http://www.khronos.org/opencl/} } % ----------------------------------------------- @misc{nvidiacl, title = {{NVIDIA OpenCL}}, URL = {http://www.nvidia.com/object/cuda_opencl_new.html} } % ----------------------------------------------- @misc{nvidiacuda, title = {{NVIDIA CUDA}}, URL = {http://www.nvidia.com/object/cuda_home_new.html} } % ----------------------------------------------- @misc{xcode, title = {{Xcode Developer Tools}}, URL = {http://developer.apple.com/technologies/tools/xcode.html} } % ----------------------------------------------- @misc{fink, title = {{Fink}}, URL = {http://www.finkproject.org/} } % ----------------------------------------------- @misc{darwinports, title = {{DarwinPorts}}, URL = {http://darwinports.com/} } % ----------------------------------------------- @misc{macports, title = {{MacPorts}}, URL = {http://www.macports.org/} } % ----------------------------------------------- @misc{atistream, title = {{ATI Stream SDK}}, URL = {http://developer.amd.com/gpu/ATIStreamSDK/Pages/default.aspx} } % ----------------------------------------------- @misc{atistreamdocu, title = {{ATI Stream SDK - Documentation}}, URL = {http://developer.amd.com/gpu/ATIStreamSDK/pages/Documentation.aspx} } % ----------------------------------------------- @misc{atidouble, title = {{ATI Knowledge Base - Double Support}}, URL = {http://developer.amd.com/support/KnowledgeBase/Lists/KnowledgeBase/DispForm.aspx?ID=88} } % ----------------------------------------------- @misc{eigen, title = {{Eigen Library}}, URL = {http://eigen.tuxfamily.org/} } % ----------------------------------------------- @misc{mtl4, title = {{MTL 4 Library}}, URL = {http://www.mtl4.org/} } % ----------------------------------------------- @book{saad-iterative-solution, author = {Saad, Y. }, howpublished = {Paperback}, isbn = {0898715342}, month = {April}, publisher = {{Society for Industrial and Applied Mathematics}}, title = {Iterative Methods for Sparse Linear Systems, Second Edition}, year = {2003} } % ----------------------------------------------- @misc{pugixml, title = {{pugixml}}, URL = {http://code.google.com/p/pugixml/} } % ----------------------------------------------- @book{trottenberg:multigrid, author = {Trottenberg, U. and Oosterlee, C. and Sch{\"{u}}ller, A.}, title = {{Multigrid}}, publisher = {Academic Press}, year = {2001} } @InBook{yang:parallel-amg, title = {{Numerical Solutions of Partial Differential Equations on Parallel Computers}}, chapter = {{Parallel Algebraic Multigrid Methods - High Performance Preconditioners}}, publisher = {Springer}, year = {2006}, author = {Yang, U.~M.}, editor = {Bruaset, A.~M. and Tveito, A.}, series = {Lecture Notes in Computational Science and Engineering}, pages = {209-236}, } @article{grote:spai, author = {Grote, M.~J. and Huckle, T.}, title = {{Parallel Preconditioning with Sparse Approximate Inverses}}, journal = {SIAM J.~Sci.~Comp.}, vol = {18}, no = {3}, pages = {838--853}, year = {1997}, } @article{huckle:fspai, author = {Huckle, T.}, title = {{Factorized Sparse Approximate Inverses for Preconditioning}}, journal = {J.~Supercomput.}, vol = {25}, pages = {109--117}, year = {2003}, } @inproceedings{cuthill:reducing-bandwidth, author = {Cuthill, E. and McKee, J.}, title = {Reducing the bandwidth of sparse symmetric matrices}, booktitle = {Proceedings of the 1969 24th National Conference}, series = {ACM '69}, year = {1969}, pages = {157--172}, publisher = {ACM}, } @article{lewis:gps-algorithm, author = {Lewis, J.~G.}, title = {Algorithm 582: The Gibbs-Poole-Stockmeyer and Gibbs-King Algorithms for Reordering Sparse Matrices}, journal = {ACM Trans. Math. Softw.}, volume = {8}, issue = {2}, year = {1982}, pages = {190--194}, publisher = {ACM}, } @book{golub:matrix-computations, author={Golub, G.~H. and Van Loan, C.~F.}, title = {Matrix Computations}, publisher = {John Hopkins University Press}, year = {1996} } @article{simon:lanczos-pro, author = {Simon, Horst~D.}, title = {The Lanczos Algorithm With Partial Reorthogonalization}, journal = {Mathematics of Computation}, volume = {42}, issue = {165}, year = {1984}, pages = {115-142}, publisher = {American Mathematical Society} } @inproceedings{lee:nmf, author = {Lee, D.~D. and Seung, S.~H.}, title = {{Algorithms for Non-negative Matrix Factorization}}, booktitle = {Advances in Neural Information Processing Systems 13}, pages = {556–562}, year = {2000}, } ViennaCL-1.5.1-src/doc/manual/memory-model.tex000644 001750 001750 00000007276 12235435247 021155 0ustar00rupprupp000000 000000 \chapter{Memory Model} With the support of multiple compute backends, memory buffers need to be managed differently depending on whether {\CUDA}, {\OpenCL} or a plain host-based buffer is in use. These different \emph{memory domains} are abstracted in a class \lstinline|viennacl::backend::mem_handle|, which is able to refer to a buffer in all three backends, possibly at the same time. Objects of type \lstinline|mem_handle| are the building blocks of scalars, vectors and matrices in {\ViennaCL}, cf.~Chap.~\ref{chap:basic-types}. The raw handles for each memory domain can be obtained via the member functions \lstinline|cuda_handle()|, \lstinline|opencl_handle()| and \lstinline|ram_handle()|. Note that the former two may not be available if no support for the respective backend is activated using the preprocessor constants \lstinline|VIENNACL_WITH_CUDA| and \lstinline|VIENNACL_WITH_OPENCL|, cf.~Sec.~\ref{sec:cuda-opencl-backends}. \section{Memory Handle Operations} Each supported backend is required to support the following functions (arguments omitted for brevity, see reference documentation in \lstinline|doc/doxygen| for details): \begin{itemize} \item \lstinline|memory_create()|: Create a memory buffer \item \lstinline|memory_copy()|: Copy the (partial) contents of one buffer to another \item \lstinline|memory_write()|: Write from a memory location in CPU RAM to the buffer \item \lstinline|memory_read()|: Read from the buffer to a memory location in CPU RAM \end{itemize} A common interface layer in \lstinline|viennacl::backend| dispatches into the respective routines in the backend for the currently active memory domain of the handle. \section{Querying and Switching Active Memory Domains} A \lstinline|mem_handle| object creates its buffer according to the following prioritized list, whichever is available: {\CUDA}, {\OpenCL}, host runtime (CPU RAM). The current memory domain can be queried using the member function \lstinline|memory_domain()| and returns one of the values \lstinline|MEMORY_NOT_INITIALIZED|, \lstinline|MAIN_MEMORY|, \lstinline|OPENCL_MEMORY|, or \lstinline|CUDA_MEMORY| defined in the struct \lstinline|viennacl::memory_types|. The currently active memory handle can be switched from outside using the member function \lstinline|switch_memory_domain()|. For example, to indicate that the memory referenced by a handle \lstinline|h|, the line \begin{lstlisting} h.switch_active_handle_id(viennacl::MAIN_MEMORY); \end{lstlisting} is sufficient. However, no memory is created, copied, or manipulated when switching the currently active handle, because a \lstinline|mem_handle| object does not know what the buffer content is referring to and is thus not able to convert data between different memory domains if required. In order to copy the contents of a memory buffer in one memory domain to a memory buffer in another memory domain within the same \lstinline|mem_handle|-object, the data type must be supplied. This is accomplished using the function \lstinline|viennacl::backend::switch_memory_domain(mem_handle, viennacl::memory_types)|, which takes the data type as template argument. Thus, in order to make current data of type \lstinline|float| availabe in CPU RAM for a handle \lstinline|h|, the function \begin{lstlisting} viennacl::backend::switch_memory_domain(h, viennacl::MAIN_MEMORY); \end{lstlisting} is sufficient. If data should be transferred from one memory handle \lstinline|h1| to another memory handle \lstinline|h2|, the function \lstinline|viennacl::backend::typesafe_memory_copy(h1, h2)| is provided. It takes the data type as template argument and ensures a data conversion between different memory domains if required (e.g. \lstinline|cl_uint| to \lstinline|unsigned int|). ViennaCL-1.5.1-src/doc/manual/installation.tex000644 001750 001750 00000037231 12255634117 021241 0ustar00rupprupp000000 000000 \chapter{Installation} This chapter shows how {\ViennaCL} can be integrated into a project and how the examples are built. The necessary steps are outlined for several different platforms, but we could not check every possible combination of hardware, operating system and compiler. If you experience any trouble, please write to the maining list at \\ \begin{center} \texttt{viennacl-support$@$lists.sourceforge.net} \end{center} % ----------------------------------------------------------------------------- % ----------------------------------------------------------------------------- \section{Dependencies} % ----------------------------------------------------------------------------- % ----------------------------------------------------------------------------- \label{dependencies} {\ViennaCL} uses the {\CMake} build system for multi-platform support. Thus, before you proceed with the installation of {\ViennaCL}, make sure you have a recent version of {\CMake} installed. To use {\ViennaCL}, only the following minimal prerequisite has to be fulfilled: \begin{itemize} \item A fairly recent C++ compiler (e.g.~{\GCC} version 4.2.x or above and Visual C++ 2005 and 2010 are known to work) \end{itemize} The full potential of {\ViennaCL} is available with the following optional libraries: \begin{itemize} \item {\CMake}~\cite{cmake} as build system (optional, but highly recommended for building examples) \item {\OpenCL}~\cite{khronoscl,nvidiacl} for accessing compute devices (GPUs); see Section~\ref{opencllibs} for details. \item {\CUDA}~\cite{nvidiacuda} for using CUDA-accelerated operations. \item {\OpenMP}~\cite{openmp} for directive-based parallelism on CPUs. \item {\ublas} (shipped with {\Boost}~\cite{boost}) provides the same interface as {\ViennaCL} and allows to switch between CPU and GPU seamlessly, see the tutorials. \item Eigen \cite{eigen} can be used to fill {\ViennaCL} types directly. Moreover, the iterative solvers in {\ViennaCL} can directly be used with Eigen objects. \item MTL 4 \cite{mtl4} can be used to fill {\ViennaCL} types directly. Even though MTL 4 provides its own iterative solvers, the {\ViennaCL} solvers can also be used with MTL 4 objects. \end{itemize} %The use of {\OpenMP} for the benchmark suite allows fair comparisons between your multi-core CPU and your compute device (e.g.~GPU). \section{Generic Installation of ViennaCL} \label{sec:viennacl-installation} Since {\ViennaCL} is a header-only library, it is sufficient to copy the folder \lstinline|viennacl/| either into your project folder or to your global system include path. On Unix based systems, this is often \lstinline|/usr/include/| or \lstinline|/usr/local/include/|. If the OpenCL headers are not installed on your system, you should repeat the above procedure with the folder \lstinline|CL/|. On Windows, the situation strongly depends on your development environment. We advise users to consult the documentation of their compiler on how to set the include path correctly. With Visual Studio this is usually something like \texttt{C:$\setminus$Program Files$\setminus$Microsoft Visual Studio 9.0$\setminus$VC$\setminus$include} and can be set in \texttt{Tools -> Options -> Projects and Solutions -> VC++-\-Directories}. For using the {\CUDA} backend, simply make sure that the {\CUDA} SDK is installed properly. If you wish to use the {\OpenCL} backend, the include and library directories of your {\OpenCL} SDK should also be added there. \NOTE{If multiple {\OpenCL} libraries are available on the host system, {\ViennaCL} uses the first platform returned by the system. Consult Chap.~\ref{chap:multi-devices} for configuring the use of other platforms.} % ----------------------------------------------------------------------------- % ----------------------------------------------------------------------------- \section{Get the {\OpenCL} Library} \label{opencllibs} % ----------------------------------------------------------------------------- % ----------------------------------------------------------------------------- In order to compile and run {\OpenCL} applications, a corresponding library (e.g.~\texttt{libOpenCL.so} under Unix based systems) and is required. If {\OpenCL} is to be used with GPUs, suitable drivers have to be installed. This section describes how these can be acquired. \TIP{Note, that for Mac OS X systems there is no need to install an {\OpenCL} capable driver and the corresponding library. The {\OpenCL} library is already present if a suitable graphics card is present. The setup of {\ViennaCL} on Mac OS X is discussed in Section~\ref{apple}.} \subsection{\NVIDIA Driver} \NVIDIA provides the {\OpenCL} library with the GPU driver. Therefore, if a \NVIDIA driver is present on the system, the library is too. However, not all of the released drivers contain the {\OpenCL} library. A driver which is known to support {\OpenCL}, and hence providing the required library, is $260.19.21$. Note that the latest {\NVIDIA} drivers do not include the {\OpenCL} headers anymore. Therefore, the official {\OpenCL} headers from the Khronos group \cite{khronoscl} are also shipped with {\ViennaCL} in the folder \lstinline|CL/|. \subsection{AMD Accelerated Parallel Processing SDK (formerly Stream SDK)} \label{sec:opencl-on-ati} AMD has provided the {\OpenCL} library with the Accelerated Parallel Processing (APP) SDK~\cite{atistream} previously, now the {\OpenCL} library is also included in the GPU driver. At the release of {\ViennaCLversion}, the latest version of the SDK is $2.7$. If used with AMD GPUs, recent AMD GPU drivers are typically required. If {\ViennaCL} is to be run on multi-core CPUs, no additional GPU driver is required. The installation notes of the APP SDK provides guidance throughout the installation process~\cite{atistreamdocu}. \TIP{If the SDK is installed in a non-system wide location on UNIX-based systems, be sure to add the {\OpenCL} library path to the \texttt{LD\_LIBRARY\_PATH} environment variable. Otherwise, linker errors will occur as the required library cannot be found.} It is important to note that the AMD APP SDK may not provide {\OpenCL} certified double precision support~\cite{atidouble} on some CPUs and GPUs. \NOTE{Unfortunately, some versions of the AMD APP SDK are known to have bugs. For example, APP SDK 2.7 on Linux causes BiCGStab to fail on some devices.} \subsection{INTEL OpenCL SDK} \label{sec:opencl-on-intel} {\ViennaCL} works fine with the INTEL OpenCL SDK on Windows and Linux. The correct linker path is set automatically in \lstinline|CMakeLists.txt| when using the {\CMake} build system, cf.~Sec.~\ref{sec:viennacl-installation}. % ----------------------------------------------------------------------------- % ----------------------------------------------------------------------------- \section{Enabling OpenMP, OpenCL, or CUDA Backends} \label{sec:cuda-opencl-backends} % ----------------------------------------------------------------------------- % ----------------------------------------------------------------------------- \TIP{The new default behavior in {\ViennaCL} 1.4.0 is to use the CPU backend. {\OpenCL} and {\CUDA} backends need to be enabled by appropriate preprocessor \lstinline|#define|s.} By default, {\ViennaCL} now uses the single-threaded/OpenMP-enabled CPU backend. The {\OpenCL} and the {\CUDA}-backend need to be enabled explicitly by using preprocessor constants as follows: \begin{center} \begin{tabular}{|l|l|} \hline \textbf{Preprocessor} \lstinline|#define| & \textbf{Default computing backend} \\ \hline none & CPU, single-threaded \\ \hline \lstinline|VIENNACL_WITH_OPENMP| & CPU with OpenMP (compiler flags required) \\ \hline \lstinline|VIENNACL_WITH_OPENCL| & OpenCL \\ \hline \lstinline|VIENNACL_WITH_CUDA| & CUDA \\ \hline \end{tabular} \end{center} The preprocessor constants can be either defined at the beginning of the source file (prior to any ViennaCL-includes), or passed to the compiler as command line argument. For example, on \lstinline|g++| the respective command line option for enabling the OpenCL backend is \lstinline|-DVIENNACL_WITH_OPENCL|. Note that CUDA requires the \lstinline|nvcc| compiler. Furthermore, the use of {\OpenMP} usually requires additional compiler flags (on \lstinline|g++| this is for example \lstinline|-fopenmp|). \TIP{The CUDA backend requires a compilation using \lstinline|nvcc|.} Multiple backends can be used simultaneously. In such case, \lstinline|CUDA| has higher priority than \lstinline|OpenCL|, which has higher priority over the CPU backend when it comes to selecting the default backend. % ----------------------------------------------------------------------------- % ----------------------------------------------------------------------------- \section{Building the Examples and Tutorials} % ----------------------------------------------------------------------------- % ----------------------------------------------------------------------------- For building the examples, we suppose that {\CMake} is properly set up on your system. The other dependencies are listed in Tab.~\ref{tab:tutorial-dependencies}. \begin{table}[tb] \begin{center} \begin{tabular}{l|l} Example/Tutorial & Dependencies\\ \hline \texttt{tutorial/amg.cpp} & {\OpenCL}, {\ublas} \\ \texttt{tutorial/bandwidth-reduction.cpp} & - \\ \texttt{tutorial/blas1.cpp/cu} & - \\ \texttt{tutorial/blas2.cpp/cu} & {\ublas} \\ \texttt{tutorial/blas3.cpp/cu} & {\ublas} \\ \texttt{tutorial/custom-kernels.cpp} & {\OpenCL} \\ \texttt{tutorial/custom-context.cpp} & {\OpenCL} \\ \texttt{tutorial/eigen-with-viennacl.cpp} & {\Eigen} \\ \texttt{tutorial/fft.cpp} & {\OpenCL} \\ \texttt{tutorial/iterative.cpp/cu} & {\ublas} \\ \texttt{tutorial/iterative-ublas.cpp} & {\ublas} \\ \texttt{tutorial/iterative-eigen.cpp} & {\Eigen} \\ \texttt{tutorial/iterative-mtl4.cpp} & {\MTL} \\ \texttt{tutorial/lanczos.cpp/cu} & {\ublas} \\ \texttt{tutorial/libviennacl.cpp/cu} & - \\ \texttt{tutorial/least-squares.cpp/cu} & {\ublas} \\ \texttt{tutorial/matrix-range.cpp/cu} & {\ublas} \\ \texttt{tutorial/mtl4-with-viennacl.cpp} & {\MTL} \\ \texttt{tutorial/multithreaded.cpp/cu} & {\Boost} \\ \texttt{tutorial/multithreaded\_cg.cpp/cu} & {\Boost} \\ \texttt{tutorial/power-iter.cpp/cu} & {\ublas} \\ \texttt{tutorial/qr.cpp/cu} & {\ublas} \\ \texttt{tutorial/scheduler.cpp} & - \\ \texttt{tutorial/spai.cpp} & {\OpenCL}, {\ublas} \\ \texttt{tutorial/sparse.cpp/cu} & {\ublas} \\ \texttt{tutorial/structured-matrices.cpp} & {\OpenCL}, {\ublas} \\ \texttt{tutorial/vector-range.cpp/cu} & {\ublas} \\ \texttt{tutorial/viennacl-info.cpp} & {\OpenCL} \\ \texttt{tutorial/wrap-cuda-buffer.cu} & {\CUDA} \\ \texttt{tutorial/wrap-host-buffer.cpp} & - \\ \texttt{benchmarks/blas3.cpp/cu} & - \\ \texttt{benchmarks/opencl.cpp} & {\OpenCL} \\ \texttt{benchmarks/solver.cpp/cu} & {\ublas} \\ \texttt{benchmarks/sparse.cpp/cu} & {\ublas} \\ \texttt{benchmarks/vector.cpp/cu} & - \\ \end{tabular} \caption{Dependencies for the examples in the \texttt{examples/} folder. Examples using the CUDA-backend use the \lstinline|.cu| file extension. Note that all examples can be run using either of the CPU, OpenCL, and CUDA backend unless an explicit {\OpenCL}-dependency is stated.} \label{tab:tutorial-dependencies} \end{center} \end{table} Before building the examples, customize \texttt{CMakeLists.txt} in the {\ViennaCL} root folder for your needs. Per default, all examples using {\ublas}, {Eigen} and {MTL4} are turned off. Please enable the respective examples based on the libraries available on your machine. Directions on how to accomplish this are given directly within the \texttt{CMakeLists.txt} file. A brief overview of the most important flags is as follows: \begin{center} \begin{tabular}{|l|l|} \hline {\CMake} Flag & Purpose \\ \hline \lstinline|ENABLE_CUDA| & Builds examples with the {\CUDA} backend enabled\\ \lstinline|ENABLE_OPENCL| & Builds examples with the {\OpenCL} backend enabled\\ \lstinline|ENABLE_OPENMP| & Builds examples with {\OpenMP} for the CPU backend enabled\\ \hline \lstinline|ENABLE_EIGEN| & Builds examples depending on {\Eigen}\\ \lstinline|ENABLE_MTL4| & Builds examples depending on {\MTL}\\ \lstinline|ENABLE_UBLAS| & Builds examples depending on {\ublas}\\ \hline \end{tabular} \end{center} \subsection{Linux} To build the examples, open a terminal and change to: \begin{lstlisting} $> cd /your-ViennaCL-path/build/ \end{lstlisting} Execute \begin{lstlisting} $> cmake .. \end{lstlisting} to obtain a Makefile and type \begin{lstlisting} $> make \end{lstlisting} to build the examples. If some of the dependencies in Tab.~\ref{tab:tutorial-dependencies} are not fulfilled, you can build each example separately: \begin{lstlisting} $> make blas1 #builds the blas level 1 tutorial $> make vectorbench #builds vector benchmarks \end{lstlisting} \TIP{Speed up the building process by using jobs, e.g. \keyword{make -j4}.} Execute the examples from the \lstinline|build/| folder as follows: \begin{lstlisting} $> examples/tutorial/blas1 $> examples/benchmarks/vectorbench \end{lstlisting} Note that all benchmark executables carry the suffix \lstinline|bench|. \TIP{Use the {\CMake}-GUI via \lstinline|cmake-gui ..| within the \lstinline|build/| folder in order to enable or disable optional libraries conveniently.} \subsection{Mac OS X} \label{apple} The tools mentioned in Section \ref{dependencies} are available on Macintosh platforms too. For the {\GCC} compiler the Xcode~\cite{xcode} package has to be installed. To install {\CMake} and {\Boost} external portation tools have to be used, for example, Fink~\cite{fink}, DarwinPorts~\cite{darwinports} or MacPorts~\cite{macports}. Such portation tools provide the aforementioned packages, {\CMake} and {\Boost}, for macintosh platforms. \TIP{If the {\CMake} build system has problems detecting your {\Boost} libraries, determine the location of your {\Boost} folder. Open the \texttt{CMakeLists.txt} file in the root directory of {\ViennaCL} and add your {\Boost} path after the following entry: \texttt{IF(\${CMAKE\_SYSTEM\_NAME} MATCHES "Darwin")} } The build process of {\ViennaCL} on Mac OS is similar to Linux. \subsection{Windows} In the following the procedure is outlined for \texttt{Visual Studio}: Assuming that an {\OpenCL} SDK and {\CMake} is already installed, Visual Studio solution and project files can be created using {\CMake}: \begin{itemize} \item Open the {\CMake} GUI. \item Set the {\ViennaCL} base directory as source directory. \item Set the \texttt{build/} directory as build directory. \item Click on 'Configure' and select the appropriate generator (e.g.~\texttt{Visual Studio 9 2008}). \item If you set \lstinline|ENABLE_CUDA|, \lstinline|ENABLE_CUDA|, \lstinline|ENABLE_MTL4|, or \lstinline|ENABLE_OPENCL| and the paths cannot be found, please select the advanced view and provide the required paths manually. \item If you set \lstinline|ENABLE_UBLAS| and the paths cannot be found, please select the advanced view and provide the required paths manually. You may have to specify the linker path for Boost manually within your Visual Studio IDE. \item Click again on 'Configure'. You should not receive an error at this point. \item Click on 'Generate'. \item The project files can now be found in the {\ViennaCL} build directory, where they can be opened and compiled with Visual Studio (provided that the include and library paths are set correctly, see Sec.~\ref{sec:viennacl-installation}). \end{itemize} \TIP{The examples and tutorials should be executed from within the \texttt{build/} directory of {\ViennaCL}, otherwise the sample data files cannot be found.} ViennaCL-1.5.1-src/doc/manual/structured-matrices.tex000644 001750 001750 00000007752 12235435247 022557 0ustar00rupprupp000000 000000 \chapter{Structured Matrix Types} \NOTE{Structured matrix types are experimental in {\ViennaCLversion}. Interface changes as well as considerable performance improvements may be included in future releases!} There are a number of structured dense matrices for which some algorithms such as matrix-vector products can be computed with much lower computational effort than for the general dense matrix case. In the following, four structured dense matrix types included in {\ViennaCL} are discussed. Example code can be found in \lstinline|examples/tutorial/structured-matrices.cpp|. \section{Circulant Matrix} A circulant matrix is a matrix of the form \begin{align*} \left( \begin{array}{ccccc} c_0 & c_{n-1} & \ldots & c_2 & c_1 \\ c_1 & c_0 & c_{n-1} & & c_2 \\ \vdots & c_1 & c_0 & \ddots & \vdots \\ c_{n-2} & & \ddots & \ddots & c_{n-1} \\ c_{n-1} & c_{n-2} & \hdots & c_1 & c_0 \\ \end{array} \right) \end{align*} and available in {\ViennaCL} via \begin{lstlisting} #include "viennacl/circulant_matrix.hpp" std::size_t s = 42; viennacl::circulant_matrix circ_mat(s, s); \end{lstlisting} The \lstinline|circulant_matrix| type can be manipulated in the same way as the dense matrix type \lstinline|matrix|. Note that writing to a single element of the matrix is structure-preserving, e.g.~changing \lstinline|circ_mat(1,2)| will automatically update \lstinline|circ_mat(0,1)|, \lstinline|circ_mat(2,3)| and so on. \section{Hankel Matrix} A Hankel matrix is a matrix of the form \begin{align*} \left( \begin{array}{cccc} a & b & c & d \\ b & c & d & e \\ c & d & e & f \\ d & e & f & g \\ \end{array} \right) \end{align*} and available in {\ViennaCL} via \begin{lstlisting} #include "viennacl/hankel_matrix.hpp" std::size_t s = 42; viennacl::hankel_matrix hank_mat(s, s); \end{lstlisting} The \lstinline|hankel_matrix| type can be manipulated in the same way as the dense matrix type \lstinline|matrix|. Note that writing to a single element of the matrix is structure-preserving, e.g.~changing \lstinline|hank_mat(1,2)| in the example above will also update \lstinline|hank_mat(0,3)|, \lstinline|hank_mat(2,1)| and \lstinline|hank_mat(3,0)|. \section{Toeplitz Matrix} A Toeplitz matrix is a matrix of the form \begin{align*} \left( \begin{array}{cccc} a & b & c & d \\ e & a & b & c \\ f & e & a & b \\ g & f & e & a \\ \end{array} \right) \end{align*} and available in {\ViennaCL} via \begin{lstlisting} #include "viennacl/toeplitz_matrix.hpp" std::size_t s = 42; viennacl::toeplitz_matrix toep_mat(s, s); \end{lstlisting} The \lstinline|toeplitz_matrix| type can be manipulated in the same way as the dense matrix type \lstinline|matrix|. Note that writing to a single element of the matrix is structure-preserving, e.g.~changing \lstinline|toep_mat(1,2)| in the example above will also update \lstinline|toep_mat(0,1)| and \lstinline|toep_mat(2,3)|. \section{Vandermonde Matrix} A Vandermonde matrix is a matrix of the form \begin{align*} \left( \begin{array}{ccccc} 1 & \alpha_1 & \alpha_1^2 & \ldots & \alpha_1^{n-1} \\ 1 & \alpha_2 & \alpha_2^2 & \ldots & \alpha_2^{n-1} \\ 1 & \vdots & \vdots & \vdots \\ 1 & \alpha_m & \alpha_m^2 & \ldots & \alpha_m^{n-1} \\ \end{array} \right) \end{align*} and available in {\ViennaCL} via \begin{lstlisting} #include "viennacl/vandermonde_matrix.hpp" std::size_t s = 42; viennacl::vandermonde_matrix vand_mat(s, s); \end{lstlisting} The \lstinline|vandermonde_matrix| type can be manipulated in the same way as the dense matrix type \lstinline|matrix|, but restrictions apply. For example, the addition or subtraction of two Vandermonde matrices does not yield another Vandermonde matrix. Note that writing to a single element of the matrix is structure-preserving, e.g.~changing \lstinline|vand_mat(1,2)| in the example above will automatically update \lstinline|vand_mat(1,3)|, \lstinline|vand_mat(1,4)|, etc. ViennaCL-1.5.1-src/doc/manual/IEEEtran_v1.13.bst000644 001750 001750 00000164162 12235435247 020761 0ustar00rupprupp000000 000000 %% %% IEEEtran.bst %% BibTeX Bibliography Style file for IEEE Journals and Conferences (unsorted) %% Version 1.13 (2008/09/30) %% %% Copyright (c) 2003-2008 Michael Shell %% %% Original starting code base and algorithms obtained from the output of %% Patrick W. Daly's makebst package as well as from prior versions of %% IEEE BibTeX styles: %% %% 1. Howard Trickey and Oren Patashnik's ieeetr.bst (1985/1988) %% 2. Silvano Balemi and Richard H. Roy's IEEEbib.bst (1993) %% %% Support sites: %% http://www.michaelshell.org/tex/ieeetran/ %% http://www.ctan.org/tex-archive/macros/latex/contrib/IEEEtran/ %% and/or %% http://www.ieee.org/ %% %% For use with BibTeX version 0.99a or later %% %% This is a numerical citation style. %% %%************************************************************************* %% Legal Notice: %% This code is offered as-is without any warranty either expressed or %% implied; without even the implied warranty of MERCHANTABILITY or %% FITNESS FOR A PARTICULAR PURPOSE! %% User assumes all risk. %% In no event shall IEEE or any contributor to this code be liable for %% any damages or losses, including, but not limited to, incidental, %% consequential, or any other damages, resulting from the use or misuse %% of any information contained here. %% %% All comments are the opinions of their respective authors and are not %% necessarily endorsed by the IEEE. %% %% This work is distributed under the LaTeX Project Public License (LPPL) %% ( http://www.latex-project.org/ ) version 1.3, and may be freely used, %% distributed and modified. A copy of the LPPL, version 1.3, is included %% in the base LaTeX documentation of all distributions of LaTeX released %% 2003/12/01 or later. %% Retain all contribution notices and credits. %% ** Modified files should be clearly indicated as such, including ** %% ** renaming them and changing author support contact information. ** %% %% File list of work: IEEEabrv.bib, IEEEfull.bib, IEEEexample.bib, %% IEEEtran.bst, IEEEtranS.bst, IEEEtranSA.bst, %% IEEEtranN.bst, IEEEtranSN.bst, IEEEtran_bst_HOWTO.pdf %%************************************************************************* % % % Changelog: % % 1.00 (2002/08/13) Initial release % % 1.10 (2002/09/27) % 1. Corrected minor bug for improperly formed warning message when a % book was not given a title. Thanks to Ming Kin Lai for reporting this. % 2. Added support for CTLname_format_string and CTLname_latex_cmd fields % in the BST control entry type. % % 1.11 (2003/04/02) % 1. Fixed bug with URLs containing underscores when using url.sty. Thanks % to Ming Kin Lai for reporting this. % % 1.12 (2007/01/11) % 1. Fixed bug with unwanted comma before "et al." when an entry contained % more than two author names. Thanks to Pallav Gupta for reporting this. % 2. Fixed bug with anomalous closing quote in tech reports that have a % type, but without a number or address. Thanks to Mehrdad Mirreza for % reporting this. % 3. Use braces in \providecommand in begin.bib to better support % latex2html. TeX style length assignments OK with recent versions % of latex2html - 1.71 (2002/2/1) or later is strongly recommended. % Use of the language field still causes trouble with latex2html. % Thanks to Federico Beffa for reporting this. % 4. Added IEEEtran.bst ID and version comment string to .bbl output. % 5. Provide a \BIBdecl hook that allows the user to execute commands % just prior to the first entry. % 6. Use default urlstyle (is using url.sty) of "same" rather than rm to % better work with a wider variety of bibliography styles. % 7. Changed month abbreviations from Sept., July and June to Sep., Jul., % and Jun., respectively, as IEEE now does. Thanks to Moritz Borgmann % for reporting this. % 8. Control entry types should not be considered when calculating longest % label width. % 9. Added alias www for electronic/online. % 10. Added CTLname_url_prefix control entry type. % % 1.13 (2008/09/30) % 1. Fixed bug with edition number to ordinal conversion. Thanks to % Michael Roland for reporting this and correcting the algorithm. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% DEFAULTS FOR THE CONTROLS OF THE BST STYLE %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % These are the defaults for the user adjustable controls. The values used % here can be overridden by the user via IEEEtranBSTCTL entry type. % NOTE: The recommended LaTeX command to invoke a control entry type is: % %\makeatletter %\def\bstctlcite{\@ifnextchar[{\@bstctlcite}{\@bstctlcite[@auxout]}} %\def\@bstctlcite[#1]#2{\@bsphack % \@for\@citeb:=#2\do{% % \edef\@citeb{\expandafter\@firstofone\@citeb}% % \if@filesw\immediate\write\csname #1\endcsname{\string\citation{\@citeb}}\fi}% % \@esphack} %\makeatother % % It is called at the start of the document, before the first \cite, like: % \bstctlcite{IEEEexample:BSTcontrol} % % IEEEtran.cls V1.6 and later does provide this command. % #0 turns off the display of the number for articles. % #1 enables FUNCTION {default.is.use.number.for.article} { #1 } % #0 turns off the display of the paper and type fields in @inproceedings. % #1 enables FUNCTION {default.is.use.paper} { #1 } % #0 turns off the forced use of "et al." % #1 enables FUNCTION {default.is.forced.et.al} { #0 } % The maximum number of names that can be present beyond which an "et al." % usage is forced. Be sure that num.names.shown.with.forced.et.al (below) % is not greater than this value! % Note: There are many instances of references in IEEE journals which have % a very large number of authors as well as instances in which "et al." is % used profusely. FUNCTION {default.max.num.names.before.forced.et.al} { #10 } % The number of names that will be shown with a forced "et al.". % Must be less than or equal to max.num.names.before.forced.et.al FUNCTION {default.num.names.shown.with.forced.et.al} { #1 } % #0 turns off the alternate interword spacing for entries with URLs. % #1 enables FUNCTION {default.is.use.alt.interword.spacing} { #1 } % If alternate interword spacing for entries with URLs is enabled, this is % the interword spacing stretch factor that will be used. For example, the % default "4" here means that the interword spacing in entries with URLs can % stretch to four times normal. Does not have to be an integer. Note that % the value specified here can be overridden by the user in their LaTeX % code via a command such as: % "\providecommand\BIBentryALTinterwordstretchfactor{1.5}" in addition to % that via the IEEEtranBSTCTL entry type. FUNCTION {default.ALTinterwordstretchfactor} { "4" } % #0 turns off the "dashification" of repeated (i.e., identical to those % of the previous entry) names. IEEE normally does this. % #1 enables FUNCTION {default.is.dash.repeated.names} { #1 } % The default name format control string. FUNCTION {default.name.format.string}{ "{f.~}{vv~}{ll}{, jj}" } % The default LaTeX font command for the names. FUNCTION {default.name.latex.cmd}{ "" } % The default URL prefix. FUNCTION {default.name.url.prefix}{ "[Online]. Available:" } % Other controls that cannot be accessed via IEEEtranBSTCTL entry type. % #0 turns off the terminal startup banner/completed message so as to % operate more quietly. % #1 enables FUNCTION {is.print.banners.to.terminal} { #1 } %%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% FILE VERSION AND BANNER %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%% FUNCTION{bst.file.version} { "1.13" } FUNCTION{bst.file.date} { "2008/09/30" } FUNCTION{bst.file.website} { "http://www.michaelshell.org/tex/ieeetran/bibtex/" } FUNCTION {banner.message} { is.print.banners.to.terminal { "-- IEEEtran.bst version" " " * bst.file.version * " (" * bst.file.date * ") " * "by Michael Shell." * top$ "-- " bst.file.website * top$ "-- See the " quote$ * "IEEEtran_bst_HOWTO.pdf" * quote$ * " manual for usage information." * top$ } { skip$ } if$ } FUNCTION {completed.message} { is.print.banners.to.terminal { "" top$ "Done." top$ } { skip$ } if$ } %%%%%%%%%%%%%%%%%%%%%% %% STRING CONSTANTS %% %%%%%%%%%%%%%%%%%%%%%% FUNCTION {bbl.and}{ "and" } FUNCTION {bbl.etal}{ "et~al." } FUNCTION {bbl.editors}{ "eds." } FUNCTION {bbl.editor}{ "ed." } FUNCTION {bbl.edition}{ "ed." } FUNCTION {bbl.volume}{ "vol." } FUNCTION {bbl.of}{ "of" } FUNCTION {bbl.number}{ "no." } FUNCTION {bbl.in}{ "in" } FUNCTION {bbl.pages}{ "pp." } FUNCTION {bbl.page}{ "p." } FUNCTION {bbl.chapter}{ "ch." } FUNCTION {bbl.paper}{ "paper" } FUNCTION {bbl.part}{ "pt." } FUNCTION {bbl.patent}{ "Patent" } FUNCTION {bbl.patentUS}{ "U.S." } FUNCTION {bbl.revision}{ "Rev." } FUNCTION {bbl.series}{ "ser." } FUNCTION {bbl.standard}{ "Std." } FUNCTION {bbl.techrep}{ "Tech. Rep." } FUNCTION {bbl.mthesis}{ "Master's thesis" } FUNCTION {bbl.phdthesis}{ "Ph.D. dissertation" } FUNCTION {bbl.st}{ "st" } FUNCTION {bbl.nd}{ "nd" } FUNCTION {bbl.rd}{ "rd" } FUNCTION {bbl.th}{ "th" } % This is the LaTeX spacer that is used when a larger than normal space % is called for (such as just before the address:publisher). FUNCTION {large.space} { "\hskip 1em plus 0.5em minus 0.4em\relax " } % The LaTeX code for dashes that are used to represent repeated names. % Note: Some older IEEE journals used something like % "\rule{0.275in}{0.5pt}\," which is fairly thick and runs right along % the baseline. However, IEEE now uses a thinner, above baseline, % six dash long sequence. FUNCTION {repeated.name.dashes} { "------" } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% PREDEFINED STRING MACROS %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% MACRO {jan} {"Jan."} MACRO {feb} {"Feb."} MACRO {mar} {"Mar."} MACRO {apr} {"Apr."} MACRO {may} {"May"} MACRO {jun} {"Jun."} MACRO {jul} {"Jul."} MACRO {aug} {"Aug."} MACRO {sep} {"Sep."} MACRO {oct} {"Oct."} MACRO {nov} {"Nov."} MACRO {dec} {"Dec."} %%%%%%%%%%%%%%%%%% %% ENTRY FIELDS %% %%%%%%%%%%%%%%%%%% ENTRY { address assignee author booktitle chapter day dayfiled edition editor howpublished institution intype journal key language month monthfiled nationality note number organization pages paper publisher school series revision title type url volume year yearfiled CTLuse_article_number CTLuse_paper CTLuse_forced_etal CTLmax_names_forced_etal CTLnames_show_etal CTLuse_alt_spacing CTLalt_stretch_factor CTLdash_repeated_names CTLname_format_string CTLname_latex_cmd CTLname_url_prefix } {} { label } %%%%%%%%%%%%%%%%%%%%%%% %% INTEGER VARIABLES %% %%%%%%%%%%%%%%%%%%%%%%% INTEGERS { prev.status.punct this.status.punct punct.std punct.no punct.comma punct.period prev.status.space this.status.space space.std space.no space.normal space.large prev.status.quote this.status.quote quote.std quote.no quote.close prev.status.nline this.status.nline nline.std nline.no nline.newblock status.cap cap.std cap.no cap.yes} INTEGERS { longest.label.width multiresult nameptr namesleft number.label numnames } INTEGERS { is.use.number.for.article is.use.paper is.forced.et.al max.num.names.before.forced.et.al num.names.shown.with.forced.et.al is.use.alt.interword.spacing is.dash.repeated.names} %%%%%%%%%%%%%%%%%%%%%% %% STRING VARIABLES %% %%%%%%%%%%%%%%%%%%%%%% STRINGS { bibinfo longest.label oldname s t ALTinterwordstretchfactor name.format.string name.latex.cmd name.url.prefix} %%%%%%%%%%%%%%%%%%%%%%%%% %% LOW LEVEL FUNCTIONS %% %%%%%%%%%%%%%%%%%%%%%%%%% FUNCTION {initialize.controls} { default.is.use.number.for.article 'is.use.number.for.article := default.is.use.paper 'is.use.paper := default.is.forced.et.al 'is.forced.et.al := default.max.num.names.before.forced.et.al 'max.num.names.before.forced.et.al := default.num.names.shown.with.forced.et.al 'num.names.shown.with.forced.et.al := default.is.use.alt.interword.spacing 'is.use.alt.interword.spacing := default.is.dash.repeated.names 'is.dash.repeated.names := default.ALTinterwordstretchfactor 'ALTinterwordstretchfactor := default.name.format.string 'name.format.string := default.name.latex.cmd 'name.latex.cmd := default.name.url.prefix 'name.url.prefix := } % This IEEEtran.bst features a very powerful and flexible mechanism for % controlling the capitalization, punctuation, spacing, quotation, and % newlines of the formatted entry fields. (Note: IEEEtran.bst does not need % or use the newline/newblock feature, but it has been implemented for % possible future use.) The output states of IEEEtran.bst consist of % multiple independent attributes and, as such, can be thought of as being % vectors, rather than the simple scalar values ("before.all", % "mid.sentence", etc.) used in most other .bst files. % % The more flexible and complex design used here was motivated in part by % IEEE's rather unusual bibliography style. For example, IEEE ends the % previous field item with a period and large space prior to the publisher % address; the @electronic entry types use periods as inter-item punctuation % rather than the commas used by the other entry types; and URLs are never % followed by periods even though they are the last item in the entry. % Although it is possible to accommodate these features with the conventional % output state system, the seemingly endless exceptions make for convoluted, % unreliable and difficult to maintain code. % % IEEEtran.bst's output state system can be easily understood via a simple % illustration of two most recently formatted entry fields (on the stack): % % CURRENT_ITEM % "PREVIOUS_ITEM % % which, in this example, is to eventually appear in the bibliography as: % % "PREVIOUS_ITEM," CURRENT_ITEM % % It is the job of the output routine to take the previous item off of the % stack (while leaving the current item at the top of the stack), apply its % trailing punctuation (including closing quote marks) and spacing, and then % to write the result to BibTeX's output buffer: % % "PREVIOUS_ITEM," % % Punctuation (and spacing) between items is often determined by both of the % items rather than just the first one. The presence of quotation marks % further complicates the situation because, in standard English, trailing % punctuation marks are supposed to be contained within the quotes. % % IEEEtran.bst maintains two output state (aka "status") vectors which % correspond to the previous and current (aka "this") items. Each vector % consists of several independent attributes which track punctuation, % spacing, quotation, and newlines. Capitalization status is handled by a % separate scalar because the format routines, not the output routine, % handle capitalization and, therefore, there is no need to maintain the % capitalization attribute for both the "previous" and "this" items. % % When a format routine adds a new item, it copies the current output status % vector to the previous output status vector and (usually) resets the % current (this) output status vector to a "standard status" vector. Using a % "standard status" vector in this way allows us to redefine what we mean by % "standard status" at the start of each entry handler and reuse the same % format routines under the various inter-item separation schemes. For % example, the standard status vector for the @book entry type may use % commas for item separators, while the @electronic type may use periods, % yet both entry handlers exploit many of the exact same format routines. % % Because format routines have write access to the output status vector of % the previous item, they can override the punctuation choices of the % previous format routine! Therefore, it becomes trivial to implement rules % such as "Always use a period and a large space before the publisher." By % pushing the generation of the closing quote mark to the output routine, we % avoid all the problems caused by having to close a quote before having all % the information required to determine what the punctuation should be. % % The IEEEtran.bst output state system can easily be expanded if needed. % For instance, it is easy to add a "space.tie" attribute value if the % bibliography rules mandate that two items have to be joined with an % unbreakable space. FUNCTION {initialize.status.constants} { #0 'punct.no := #1 'punct.comma := #2 'punct.period := #0 'space.no := #1 'space.normal := #2 'space.large := #0 'quote.no := #1 'quote.close := #0 'cap.no := #1 'cap.yes := #0 'nline.no := #1 'nline.newblock := } FUNCTION {std.status.using.comma} { punct.comma 'punct.std := space.normal 'space.std := quote.no 'quote.std := nline.no 'nline.std := cap.no 'cap.std := } FUNCTION {std.status.using.period} { punct.period 'punct.std := space.normal 'space.std := quote.no 'quote.std := nline.no 'nline.std := cap.yes 'cap.std := } FUNCTION {initialize.prev.this.status} { punct.no 'prev.status.punct := space.no 'prev.status.space := quote.no 'prev.status.quote := nline.no 'prev.status.nline := punct.no 'this.status.punct := space.no 'this.status.space := quote.no 'this.status.quote := nline.no 'this.status.nline := cap.yes 'status.cap := } FUNCTION {this.status.std} { punct.std 'this.status.punct := space.std 'this.status.space := quote.std 'this.status.quote := nline.std 'this.status.nline := } FUNCTION {cap.status.std}{ cap.std 'status.cap := } FUNCTION {this.to.prev.status} { this.status.punct 'prev.status.punct := this.status.space 'prev.status.space := this.status.quote 'prev.status.quote := this.status.nline 'prev.status.nline := } FUNCTION {not} { { #0 } { #1 } if$ } FUNCTION {and} { { skip$ } { pop$ #0 } if$ } FUNCTION {or} { { pop$ #1 } { skip$ } if$ } % convert the strings "yes" or "no" to #1 or #0 respectively FUNCTION {yes.no.to.int} { "l" change.case$ duplicate$ "yes" = { pop$ #1 } { duplicate$ "no" = { pop$ #0 } { "unknown boolean " quote$ * swap$ * quote$ * " in " * cite$ * warning$ #0 } if$ } if$ } % pushes true if the single char string on the stack is in the % range of "0" to "9" FUNCTION {is.num} { chr.to.int$ duplicate$ "0" chr.to.int$ < not swap$ "9" chr.to.int$ > not and } % multiplies the integer on the stack by a factor of 10 FUNCTION {bump.int.mag} { #0 'multiresult := { duplicate$ #0 > } { #1 - multiresult #10 + 'multiresult := } while$ pop$ multiresult } % converts a single character string on the stack to an integer FUNCTION {char.to.integer} { duplicate$ is.num { chr.to.int$ "0" chr.to.int$ - } {"noninteger character " quote$ * swap$ * quote$ * " in integer field of " * cite$ * warning$ #0 } if$ } % converts a string on the stack to an integer FUNCTION {string.to.integer} { duplicate$ text.length$ 'namesleft := #1 'nameptr := #0 'numnames := { nameptr namesleft > not } { duplicate$ nameptr #1 substring$ char.to.integer numnames bump.int.mag + 'numnames := nameptr #1 + 'nameptr := } while$ pop$ numnames } % The output routines write out the *next* to the top (previous) item on the % stack, adding punctuation and such as needed. Since IEEEtran.bst maintains % the output status for the top two items on the stack, these output % routines have to consider the previous output status (which corresponds to % the item that is being output). Full independent control of punctuation, % closing quote marks, spacing, and newblock is provided. % % "output.nonnull" does not check for the presence of a previous empty % item. % % "output" does check for the presence of a previous empty item and will % remove an empty item rather than outputing it. % % "output.warn" is like "output", but will issue a warning if it detects % an empty item. FUNCTION {output.nonnull} { swap$ prev.status.punct punct.comma = { "," * } { skip$ } if$ prev.status.punct punct.period = { add.period$ } { skip$ } if$ prev.status.quote quote.close = { "''" * } { skip$ } if$ prev.status.space space.normal = { " " * } { skip$ } if$ prev.status.space space.large = { large.space * } { skip$ } if$ write$ prev.status.nline nline.newblock = { newline$ "\newblock " write$ } { skip$ } if$ } FUNCTION {output} { duplicate$ empty$ 'pop$ 'output.nonnull if$ } FUNCTION {output.warn} { 't := duplicate$ empty$ { pop$ "empty " t * " in " * cite$ * warning$ } 'output.nonnull if$ } % "fin.entry" is the output routine that handles the last item of the entry % (which will be on the top of the stack when "fin.entry" is called). FUNCTION {fin.entry} { this.status.punct punct.no = { skip$ } { add.period$ } if$ this.status.quote quote.close = { "''" * } { skip$ } if$ write$ newline$ } FUNCTION {is.last.char.not.punct} { duplicate$ "}" * add.period$ #-1 #1 substring$ "." = } FUNCTION {is.multiple.pages} { 't := #0 'multiresult := { multiresult not t empty$ not and } { t #1 #1 substring$ duplicate$ "-" = swap$ duplicate$ "," = swap$ "+" = or or { #1 'multiresult := } { t #2 global.max$ substring$ 't := } if$ } while$ multiresult } FUNCTION {capitalize}{ "u" change.case$ "t" change.case$ } FUNCTION {emphasize} { duplicate$ empty$ { pop$ "" } { "\emph{" swap$ * "}" * } if$ } FUNCTION {do.name.latex.cmd} { name.latex.cmd empty$ { skip$ } { name.latex.cmd "{" * swap$ * "}" * } if$ } % IEEEtran.bst uses its own \BIBforeignlanguage command which directly % invokes the TeX hyphenation patterns without the need of the Babel % package. Babel does a lot more than switch hyphenation patterns and % its loading can cause unintended effects in many class files (such as % IEEEtran.cls). FUNCTION {select.language} { duplicate$ empty$ 'pop$ { language empty$ 'skip$ { "\BIBforeignlanguage{" language * "}{" * swap$ * "}" * } if$ } if$ } FUNCTION {tie.or.space.prefix} { duplicate$ text.length$ #3 < { "~" } { " " } if$ swap$ } FUNCTION {get.bbl.editor} { editor num.names$ #1 > 'bbl.editors 'bbl.editor if$ } FUNCTION {space.word}{ " " swap$ * " " * } % Field Conditioners, Converters, Checkers and External Interfaces FUNCTION {empty.field.to.null.string} { duplicate$ empty$ { pop$ "" } { skip$ } if$ } FUNCTION {either.or.check} { empty$ { pop$ } { "can't use both " swap$ * " fields in " * cite$ * warning$ } if$ } FUNCTION {empty.entry.warn} { author empty$ title empty$ howpublished empty$ month empty$ year empty$ note empty$ url empty$ and and and and and and { "all relevant fields are empty in " cite$ * warning$ } 'skip$ if$ } % The bibinfo system provides a way for the electronic parsing/acquisition % of a bibliography's contents as is done by ReVTeX. For example, a field % could be entered into the bibliography as: % \bibinfo{volume}{2} % Only the "2" would show up in the document, but the LaTeX \bibinfo command % could do additional things with the information. IEEEtran.bst does provide % a \bibinfo command via "\providecommand{\bibinfo}[2]{#2}". However, it is % currently not used as the bogus bibinfo functions defined here output the % entry values directly without the \bibinfo wrapper. The bibinfo functions % themselves (and the calls to them) are retained for possible future use. % % bibinfo.check avoids acting on missing fields while bibinfo.warn will % issue a warning message if a missing field is detected. Prior to calling % the bibinfo functions, the user should push the field value and then its % name string, in that order. FUNCTION {bibinfo.check} { swap$ duplicate$ missing$ { pop$ pop$ "" } { duplicate$ empty$ { swap$ pop$ } { swap$ pop$ } if$ } if$ } FUNCTION {bibinfo.warn} { swap$ duplicate$ missing$ { swap$ "missing " swap$ * " in " * cite$ * warning$ pop$ "" } { duplicate$ empty$ { swap$ "empty " swap$ * " in " * cite$ * warning$ } { swap$ pop$ } if$ } if$ } % IEEE separates large numbers with more than 4 digits into groups of % three. IEEE uses a small space to separate these number groups. % Typical applications include patent and page numbers. % number of consecutive digits required to trigger the group separation. FUNCTION {large.number.trigger}{ #5 } % For numbers longer than the trigger, this is the blocksize of the groups. % The blocksize must be less than the trigger threshold, and 2 * blocksize % must be greater than the trigger threshold (can't do more than one % separation on the initial trigger). FUNCTION {large.number.blocksize}{ #3 } % What is actually inserted between the number groups. FUNCTION {large.number.separator}{ "\," } % So as to save on integer variables by reusing existing ones, numnames % holds the current number of consecutive digits read and nameptr holds % the number that will trigger an inserted space. FUNCTION {large.number.separate} { 't := "" #0 'numnames := large.number.trigger 'nameptr := { t empty$ not } { t #-1 #1 substring$ is.num { numnames #1 + 'numnames := } { #0 'numnames := large.number.trigger 'nameptr := } if$ t #-1 #1 substring$ swap$ * t #-2 global.max$ substring$ 't := numnames nameptr = { duplicate$ #1 nameptr large.number.blocksize - substring$ swap$ nameptr large.number.blocksize - #1 + global.max$ substring$ large.number.separator swap$ * * nameptr large.number.blocksize - 'numnames := large.number.blocksize #1 + 'nameptr := } { skip$ } if$ } while$ } % Converts all single dashes "-" to double dashes "--". FUNCTION {n.dashify} { large.number.separate 't := "" { t empty$ not } { t #1 #1 substring$ "-" = { t #1 #2 substring$ "--" = not { "--" * t #2 global.max$ substring$ 't := } { { t #1 #1 substring$ "-" = } { "-" * t #2 global.max$ substring$ 't := } while$ } if$ } { t #1 #1 substring$ * t #2 global.max$ substring$ 't := } if$ } while$ } % This function detects entries with names that are identical to that of % the previous entry and replaces the repeated names with dashes (if the % "is.dash.repeated.names" user control is nonzero). FUNCTION {name.or.dash} { 's := oldname empty$ { s 'oldname := s } { s oldname = { is.dash.repeated.names { repeated.name.dashes } { s 'oldname := s } if$ } { s 'oldname := s } if$ } if$ } % Converts the number string on the top of the stack to % "numerical ordinal form" (e.g., "7" to "7th"). There is % no artificial limit to the upper bound of the numbers as the % two least significant digits determine the ordinal form. FUNCTION {num.to.ordinal} { duplicate$ #-2 #1 substring$ "1" = { bbl.th * } { duplicate$ #-1 #1 substring$ "1" = { bbl.st * } { duplicate$ #-1 #1 substring$ "2" = { bbl.nd * } { duplicate$ #-1 #1 substring$ "3" = { bbl.rd * } { bbl.th * } if$ } if$ } if$ } if$ } % If the string on the top of the stack begins with a number, % (e.g., 11th) then replace the string with the leading number % it contains. Otherwise retain the string as-is. s holds the % extracted number, t holds the part of the string that remains % to be scanned. FUNCTION {extract.num} { duplicate$ 't := "" 's := { t empty$ not } { t #1 #1 substring$ t #2 global.max$ substring$ 't := duplicate$ is.num { s swap$ * 's := } { pop$ "" 't := } if$ } while$ s empty$ 'skip$ { pop$ s } if$ } % Converts the word number string on the top of the stack to % Arabic string form. Will be successful up to "tenth". FUNCTION {word.to.num} { duplicate$ "l" change.case$ 's := s "first" = { pop$ "1" } { skip$ } if$ s "second" = { pop$ "2" } { skip$ } if$ s "third" = { pop$ "3" } { skip$ } if$ s "fourth" = { pop$ "4" } { skip$ } if$ s "fifth" = { pop$ "5" } { skip$ } if$ s "sixth" = { pop$ "6" } { skip$ } if$ s "seventh" = { pop$ "7" } { skip$ } if$ s "eighth" = { pop$ "8" } { skip$ } if$ s "ninth" = { pop$ "9" } { skip$ } if$ s "tenth" = { pop$ "10" } { skip$ } if$ } % Converts the string on the top of the stack to numerical % ordinal (e.g., "11th") form. FUNCTION {convert.edition} { duplicate$ empty$ 'skip$ { duplicate$ #1 #1 substring$ is.num { extract.num num.to.ordinal } { word.to.num duplicate$ #1 #1 substring$ is.num { num.to.ordinal } { "edition ordinal word " quote$ * edition * quote$ * " may be too high (or improper) for conversion" * " in " * cite$ * warning$ } if$ } if$ } if$ } %%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% LATEX BIBLIOGRAPHY CODE %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%% FUNCTION {start.entry} { newline$ "\bibitem{" write$ cite$ write$ "}" write$ newline$ "" initialize.prev.this.status } % Here we write out all the LaTeX code that we will need. The most involved % code sequences are those that control the alternate interword spacing and % foreign language hyphenation patterns. The heavy use of \providecommand % gives users a way to override the defaults. Special thanks to Javier Bezos, % Johannes Braams, Robin Fairbairns, Heiko Oberdiek, Donald Arseneau and all % the other gurus on comp.text.tex for their help and advice on the topic of % \selectlanguage, Babel and BibTeX. FUNCTION {begin.bib} { "% Generated by IEEEtran.bst, version: " bst.file.version * " (" * bst.file.date * ")" * write$ newline$ preamble$ empty$ 'skip$ { preamble$ write$ newline$ } if$ "\begin{thebibliography}{" longest.label * "}" * write$ newline$ "\providecommand{\url}[1]{#1}" write$ newline$ "\csname url@samestyle\endcsname" write$ newline$ "\providecommand{\newblock}{\relax}" write$ newline$ "\providecommand{\bibinfo}[2]{#2}" write$ newline$ "\providecommand{\BIBentrySTDinterwordspacing}{\spaceskip=0pt\relax}" write$ newline$ "\providecommand{\BIBentryALTinterwordstretchfactor}{" ALTinterwordstretchfactor * "}" * write$ newline$ "\providecommand{\BIBentryALTinterwordspacing}{\spaceskip=\fontdimen2\font plus " write$ newline$ "\BIBentryALTinterwordstretchfactor\fontdimen3\font minus \fontdimen4\font\relax}" write$ newline$ "\providecommand{\BIBforeignlanguage}[2]{{%" write$ newline$ "\expandafter\ifx\csname l@#1\endcsname\relax" write$ newline$ "\typeout{** WARNING: IEEEtran.bst: No hyphenation pattern has been}%" write$ newline$ "\typeout{** loaded for the language `#1'. Using the pattern for}%" write$ newline$ "\typeout{** the default language instead.}%" write$ newline$ "\else" write$ newline$ "\language=\csname l@#1\endcsname" write$ newline$ "\fi" write$ newline$ "#2}}" write$ newline$ "\providecommand{\BIBdecl}{\relax}" write$ newline$ "\BIBdecl" write$ newline$ } FUNCTION {end.bib} { newline$ "\end{thebibliography}" write$ newline$ } FUNCTION {if.url.alt.interword.spacing} { is.use.alt.interword.spacing {url empty$ 'skip$ {"\BIBentryALTinterwordspacing" write$ newline$} if$} { skip$ } if$ } FUNCTION {if.url.std.interword.spacing} { is.use.alt.interword.spacing {url empty$ 'skip$ {"\BIBentrySTDinterwordspacing" write$ newline$} if$} { skip$ } if$ } %%%%%%%%%%%%%%%%%%%%%%%% %% LONGEST LABEL PASS %% %%%%%%%%%%%%%%%%%%%%%%%% FUNCTION {initialize.longest.label} { "" 'longest.label := #1 'number.label := #0 'longest.label.width := } FUNCTION {longest.label.pass} { type$ "ieeetranbstctl" = { skip$ } { number.label int.to.str$ 'label := number.label #1 + 'number.label := label width$ longest.label.width > { label 'longest.label := label width$ 'longest.label.width := } { skip$ } if$ } if$ } %%%%%%%%%%%%%%%%%%%%% %% FORMAT HANDLERS %% %%%%%%%%%%%%%%%%%%%%% %% Lower Level Formats (used by higher level formats) FUNCTION {format.address.org.or.pub.date} { 't := "" year empty$ { "empty year in " cite$ * warning$ } { skip$ } if$ address empty$ t empty$ and year empty$ and month empty$ and { skip$ } { this.to.prev.status this.status.std cap.status.std address "address" bibinfo.check * t empty$ { skip$ } { punct.period 'prev.status.punct := space.large 'prev.status.space := address empty$ { skip$ } { ": " * } if$ t * } if$ year empty$ month empty$ and { skip$ } { t empty$ address empty$ and { skip$ } { ", " * } if$ month empty$ { year empty$ { skip$ } { year "year" bibinfo.check * } if$ } { month "month" bibinfo.check * year empty$ { skip$ } { " " * year "year" bibinfo.check * } if$ } if$ } if$ } if$ } FUNCTION {format.names} { 'bibinfo := duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std 's := "" 't := #1 'nameptr := s num.names$ 'numnames := numnames 'namesleft := { namesleft #0 > } { s nameptr name.format.string format.name$ bibinfo bibinfo.check 't := nameptr #1 > { nameptr num.names.shown.with.forced.et.al #1 + = numnames max.num.names.before.forced.et.al > is.forced.et.al and and { "others" 't := #1 'namesleft := } { skip$ } if$ namesleft #1 > { ", " * t do.name.latex.cmd * } { s nameptr "{ll}" format.name$ duplicate$ "others" = { 't := } { pop$ } if$ t "others" = { " " * bbl.etal emphasize * } { numnames #2 > { "," * } { skip$ } if$ bbl.and space.word * t do.name.latex.cmd * } if$ } if$ } { t do.name.latex.cmd } if$ nameptr #1 + 'nameptr := namesleft #1 - 'namesleft := } while$ cap.status.std } if$ } %% Higher Level Formats %% addresses/locations FUNCTION {format.address} { address duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std cap.status.std } if$ } %% author/editor names FUNCTION {format.authors}{ author "author" format.names } FUNCTION {format.editors} { editor "editor" format.names duplicate$ empty$ 'skip$ { ", " * get.bbl.editor capitalize * } if$ } %% date FUNCTION {format.date} { month "month" bibinfo.check duplicate$ empty$ year "year" bibinfo.check duplicate$ empty$ { swap$ 'skip$ { this.to.prev.status this.status.std cap.status.std "there's a month but no year in " cite$ * warning$ } if$ * } { this.to.prev.status this.status.std cap.status.std swap$ 'skip$ { swap$ " " * swap$ } if$ * } if$ } FUNCTION {format.date.electronic} { month "month" bibinfo.check duplicate$ empty$ year "year" bibinfo.check duplicate$ empty$ { swap$ { pop$ } { "there's a month but no year in " cite$ * warning$ pop$ ")" * "(" swap$ * this.to.prev.status punct.no 'this.status.punct := space.normal 'this.status.space := quote.no 'this.status.quote := cap.yes 'status.cap := } if$ } { swap$ { swap$ pop$ ")" * "(" swap$ * } { "(" swap$ * ", " * swap$ * ")" * } if$ this.to.prev.status punct.no 'this.status.punct := space.normal 'this.status.space := quote.no 'this.status.quote := cap.yes 'status.cap := } if$ } %% edition/title % Note: IEEE considers the edition to be closely associated with % the title of a book. So, in IEEEtran.bst the edition is normally handled % within the formatting of the title. The format.edition function is % retained here for possible future use. FUNCTION {format.edition} { edition duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std convert.edition status.cap { "t" } { "l" } if$ change.case$ "edition" bibinfo.check "~" * bbl.edition * cap.status.std } if$ } % This is used to format the booktitle of a conference proceedings. % Here we use the "intype" field to provide the user a way to % override the word "in" (e.g., with things like "presented at") % Use of intype stops the emphasis of the booktitle to indicate that % we no longer mean the written conference proceedings, but the % conference itself. FUNCTION {format.in.booktitle} { booktitle "booktitle" bibinfo.check duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std select.language intype missing$ { emphasize bbl.in " " * } { intype " " * } if$ swap$ * cap.status.std } if$ } % This is used to format the booktitle of collection. % Here the "intype" field is not supported, but "edition" is. FUNCTION {format.in.booktitle.edition} { booktitle "booktitle" bibinfo.check duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std select.language emphasize edition empty$ 'skip$ { ", " * edition convert.edition "l" change.case$ * "~" * bbl.edition * } if$ bbl.in " " * swap$ * cap.status.std } if$ } FUNCTION {format.article.title} { title duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std "t" change.case$ } if$ "title" bibinfo.check duplicate$ empty$ 'skip$ { quote.close 'this.status.quote := is.last.char.not.punct { punct.std 'this.status.punct := } { punct.no 'this.status.punct := } if$ select.language "``" swap$ * cap.status.std } if$ } FUNCTION {format.article.title.electronic} { title duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std cap.status.std "t" change.case$ } if$ "title" bibinfo.check duplicate$ empty$ { skip$ } { select.language } if$ } FUNCTION {format.book.title.edition} { title "title" bibinfo.check duplicate$ empty$ { "empty title in " cite$ * warning$ } { this.to.prev.status this.status.std select.language emphasize edition empty$ 'skip$ { ", " * edition convert.edition status.cap { "t" } { "l" } if$ change.case$ * "~" * bbl.edition * } if$ cap.status.std } if$ } FUNCTION {format.book.title} { title "title" bibinfo.check duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std cap.status.std select.language emphasize } if$ } %% journal FUNCTION {format.journal} { journal duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std cap.status.std select.language emphasize } if$ } %% how published FUNCTION {format.howpublished} { howpublished duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std cap.status.std } if$ } %% institutions/organization/publishers/school FUNCTION {format.institution} { institution duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std cap.status.std } if$ } FUNCTION {format.organization} { organization duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std cap.status.std } if$ } FUNCTION {format.address.publisher.date} { publisher "publisher" bibinfo.warn format.address.org.or.pub.date } FUNCTION {format.address.publisher.date.nowarn} { publisher "publisher" bibinfo.check format.address.org.or.pub.date } FUNCTION {format.address.organization.date} { organization "organization" bibinfo.check format.address.org.or.pub.date } FUNCTION {format.school} { school duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std cap.status.std } if$ } %% volume/number/series/chapter/pages FUNCTION {format.volume} { volume empty.field.to.null.string duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std bbl.volume status.cap { capitalize } { skip$ } if$ swap$ tie.or.space.prefix "volume" bibinfo.check * * cap.status.std } if$ } FUNCTION {format.number} { number empty.field.to.null.string duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std status.cap { bbl.number capitalize } { bbl.number } if$ swap$ tie.or.space.prefix "number" bibinfo.check * * cap.status.std } if$ } FUNCTION {format.number.if.use.for.article} { is.use.number.for.article { format.number } { "" } if$ } % IEEE does not seem to tie the series so closely with the volume % and number as is done in other bibliography styles. Instead the % series is treated somewhat like an extension of the title. FUNCTION {format.series} { series empty$ { "" } { this.to.prev.status this.status.std bbl.series " " * series "series" bibinfo.check * cap.status.std } if$ } FUNCTION {format.chapter} { chapter empty$ { "" } { this.to.prev.status this.status.std type empty$ { bbl.chapter } { type "l" change.case$ "type" bibinfo.check } if$ chapter tie.or.space.prefix "chapter" bibinfo.check * * cap.status.std } if$ } % The intended use of format.paper is for paper numbers of inproceedings. % The paper type can be overridden via the type field. % We allow the type to be displayed even if the paper number is absent % for things like "postdeadline paper" FUNCTION {format.paper} { is.use.paper { paper empty$ { type empty$ { "" } { this.to.prev.status this.status.std type "type" bibinfo.check cap.status.std } if$ } { this.to.prev.status this.status.std type empty$ { bbl.paper } { type "type" bibinfo.check } if$ " " * paper "paper" bibinfo.check * cap.status.std } if$ } { "" } if$ } FUNCTION {format.pages} { pages duplicate$ empty$ 'skip$ { this.to.prev.status this.status.std duplicate$ is.multiple.pages { bbl.pages swap$ n.dashify } { bbl.page swap$ } if$ tie.or.space.prefix "pages" bibinfo.check * * cap.status.std } if$ } %% technical report number FUNCTION {format.tech.report.number} { number "number" bibinfo.check this.to.prev.status this.status.std cap.status.std type duplicate$ empty$ { pop$ bbl.techrep } { skip$ } if$ "type" bibinfo.check swap$ duplicate$ empty$ { pop$ } { tie.or.space.prefix * * } if$ } %% note FUNCTION {format.note} { note empty$ { "" } { this.to.prev.status this.status.std punct.period 'this.status.punct := note #1 #1 substring$ duplicate$ "{" = { skip$ } { status.cap { "u" } { "l" } if$ change.case$ } if$ note #2 global.max$ substring$ * "note" bibinfo.check cap.yes 'status.cap := } if$ } %% patent FUNCTION {format.patent.date} { this.to.prev.status this.status.std year empty$ { monthfiled duplicate$ empty$ { "monthfiled" bibinfo.check pop$ "" } { "monthfiled" bibinfo.check } if$ dayfiled duplicate$ empty$ { "dayfiled" bibinfo.check pop$ "" * } { "dayfiled" bibinfo.check monthfiled empty$ { "dayfiled without a monthfiled in " cite$ * warning$ * } { " " swap$ * * } if$ } if$ yearfiled empty$ { "no year or yearfiled in " cite$ * warning$ } { yearfiled "yearfiled" bibinfo.check swap$ duplicate$ empty$ { pop$ } { ", " * swap$ * } if$ } if$ } { month duplicate$ empty$ { "month" bibinfo.check pop$ "" } { "month" bibinfo.check } if$ day duplicate$ empty$ { "day" bibinfo.check pop$ "" * } { "day" bibinfo.check month empty$ { "day without a month in " cite$ * warning$ * } { " " swap$ * * } if$ } if$ year "year" bibinfo.check swap$ duplicate$ empty$ { pop$ } { ", " * swap$ * } if$ } if$ cap.status.std } FUNCTION {format.patent.nationality.type.number} { this.to.prev.status this.status.std nationality duplicate$ empty$ { "nationality" bibinfo.warn pop$ "" } { "nationality" bibinfo.check duplicate$ "l" change.case$ "united states" = { pop$ bbl.patentUS } { skip$ } if$ " " * } if$ type empty$ { bbl.patent "type" bibinfo.check } { type "type" bibinfo.check } if$ * number duplicate$ empty$ { "number" bibinfo.warn pop$ } { "number" bibinfo.check large.number.separate swap$ " " * swap$ * } if$ cap.status.std } %% standard FUNCTION {format.organization.institution.standard.type.number} { this.to.prev.status this.status.std organization duplicate$ empty$ { pop$ institution duplicate$ empty$ { "institution" bibinfo.warn } { "institution" bibinfo.warn " " * } if$ } { "organization" bibinfo.warn " " * } if$ type empty$ { bbl.standard "type" bibinfo.check } { type "type" bibinfo.check } if$ * number duplicate$ empty$ { "number" bibinfo.check pop$ } { "number" bibinfo.check large.number.separate swap$ " " * swap$ * } if$ cap.status.std } FUNCTION {format.revision} { revision empty$ { "" } { this.to.prev.status this.status.std bbl.revision revision tie.or.space.prefix "revision" bibinfo.check * * cap.status.std } if$ } %% thesis FUNCTION {format.master.thesis.type} { this.to.prev.status this.status.std type empty$ { bbl.mthesis } { type "type" bibinfo.check } if$ cap.status.std } FUNCTION {format.phd.thesis.type} { this.to.prev.status this.status.std type empty$ { bbl.phdthesis } { type "type" bibinfo.check } if$ cap.status.std } %% URL FUNCTION {format.url} { url empty$ { "" } { this.to.prev.status this.status.std cap.yes 'status.cap := name.url.prefix " " * "\url{" * url * "}" * punct.no 'this.status.punct := punct.period 'prev.status.punct := space.normal 'this.status.space := space.normal 'prev.status.space := quote.no 'this.status.quote := } if$ } %%%%%%%%%%%%%%%%%%%% %% ENTRY HANDLERS %% %%%%%%%%%%%%%%%%%%%% % Note: In many journals, IEEE (or the authors) tend not to show the number % for articles, so the display of the number is controlled here by the % switch "is.use.number.for.article" FUNCTION {article} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors "author" output.warn name.or.dash format.article.title "title" output.warn format.journal "journal" bibinfo.check "journal" output.warn format.volume output format.number.if.use.for.article output format.pages output format.date "year" output.warn format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {book} { std.status.using.comma start.entry if.url.alt.interword.spacing author empty$ { format.editors "author and editor" output.warn } { format.authors output.nonnull } if$ name.or.dash format.book.title.edition output format.series output author empty$ { skip$ } { format.editors output } if$ format.address.publisher.date output format.volume output format.number output format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {booklet} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors output name.or.dash format.article.title "title" output.warn format.howpublished "howpublished" bibinfo.check output format.organization "organization" bibinfo.check output format.address "address" bibinfo.check output format.date output format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {electronic} { std.status.using.period start.entry if.url.alt.interword.spacing format.authors output name.or.dash format.date.electronic output format.article.title.electronic output format.howpublished "howpublished" bibinfo.check output format.organization "organization" bibinfo.check output format.address "address" bibinfo.check output format.note output format.url output fin.entry empty.entry.warn if.url.std.interword.spacing } FUNCTION {inbook} { std.status.using.comma start.entry if.url.alt.interword.spacing author empty$ { format.editors "author and editor" output.warn } { format.authors output.nonnull } if$ name.or.dash format.book.title.edition output format.series output format.address.publisher.date output format.volume output format.number output format.chapter output format.pages output format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {incollection} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors "author" output.warn name.or.dash format.article.title "title" output.warn format.in.booktitle.edition "booktitle" output.warn format.series output format.editors output format.address.publisher.date.nowarn output format.volume output format.number output format.chapter output format.pages output format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {inproceedings} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors "author" output.warn name.or.dash format.article.title "title" output.warn format.in.booktitle "booktitle" output.warn format.series output format.editors output format.volume output format.number output publisher empty$ { format.address.organization.date output } { format.organization "organization" bibinfo.check output format.address.publisher.date output } if$ format.paper output format.pages output format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {manual} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors output name.or.dash format.book.title.edition "title" output.warn format.howpublished "howpublished" bibinfo.check output format.organization "organization" bibinfo.check output format.address "address" bibinfo.check output format.date output format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {mastersthesis} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors "author" output.warn name.or.dash format.article.title "title" output.warn format.master.thesis.type output.nonnull format.school "school" bibinfo.warn output format.address "address" bibinfo.check output format.date "year" output.warn format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {misc} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors output name.or.dash format.article.title output format.howpublished "howpublished" bibinfo.check output format.organization "organization" bibinfo.check output format.address "address" bibinfo.check output format.pages output format.date output format.note output format.url output fin.entry empty.entry.warn if.url.std.interword.spacing } FUNCTION {patent} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors output name.or.dash format.article.title output format.patent.nationality.type.number output format.patent.date output format.note output format.url output fin.entry empty.entry.warn if.url.std.interword.spacing } FUNCTION {periodical} { std.status.using.comma start.entry if.url.alt.interword.spacing format.editors output name.or.dash format.book.title "title" output.warn format.series output format.volume output format.number output format.organization "organization" bibinfo.check output format.date "year" output.warn format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {phdthesis} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors "author" output.warn name.or.dash format.article.title "title" output.warn format.phd.thesis.type output.nonnull format.school "school" bibinfo.warn output format.address "address" bibinfo.check output format.date "year" output.warn format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {proceedings} { std.status.using.comma start.entry if.url.alt.interword.spacing format.editors output name.or.dash format.book.title "title" output.warn format.series output format.volume output format.number output publisher empty$ { format.address.organization.date output } { format.organization "organization" bibinfo.check output format.address.publisher.date output } if$ format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {standard} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors output name.or.dash format.book.title "title" output.warn format.howpublished "howpublished" bibinfo.check output format.organization.institution.standard.type.number output format.revision output format.date output format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {techreport} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors "author" output.warn name.or.dash format.article.title "title" output.warn format.howpublished "howpublished" bibinfo.check output format.institution "institution" bibinfo.warn output format.address "address" bibinfo.check output format.tech.report.number output.nonnull format.date "year" output.warn format.note output format.url output fin.entry if.url.std.interword.spacing } FUNCTION {unpublished} { std.status.using.comma start.entry if.url.alt.interword.spacing format.authors "author" output.warn name.or.dash format.article.title "title" output.warn format.date output format.note "note" output.warn format.url output fin.entry if.url.std.interword.spacing } % The special entry type which provides the user interface to the % BST controls FUNCTION {IEEEtranBSTCTL} { is.print.banners.to.terminal { "** IEEEtran BST control entry " quote$ * cite$ * quote$ * " detected." * top$ } { skip$ } if$ CTLuse_article_number empty$ { skip$ } { CTLuse_article_number yes.no.to.int 'is.use.number.for.article := } if$ CTLuse_paper empty$ { skip$ } { CTLuse_paper yes.no.to.int 'is.use.paper := } if$ CTLuse_forced_etal empty$ { skip$ } { CTLuse_forced_etal yes.no.to.int 'is.forced.et.al := } if$ CTLmax_names_forced_etal empty$ { skip$ } { CTLmax_names_forced_etal string.to.integer 'max.num.names.before.forced.et.al := } if$ CTLnames_show_etal empty$ { skip$ } { CTLnames_show_etal string.to.integer 'num.names.shown.with.forced.et.al := } if$ CTLuse_alt_spacing empty$ { skip$ } { CTLuse_alt_spacing yes.no.to.int 'is.use.alt.interword.spacing := } if$ CTLalt_stretch_factor empty$ { skip$ } { CTLalt_stretch_factor 'ALTinterwordstretchfactor := "\renewcommand{\BIBentryALTinterwordstretchfactor}{" ALTinterwordstretchfactor * "}" * write$ newline$ } if$ CTLdash_repeated_names empty$ { skip$ } { CTLdash_repeated_names yes.no.to.int 'is.dash.repeated.names := } if$ CTLname_format_string empty$ { skip$ } { CTLname_format_string 'name.format.string := } if$ CTLname_latex_cmd empty$ { skip$ } { CTLname_latex_cmd 'name.latex.cmd := } if$ CTLname_url_prefix missing$ { skip$ } { CTLname_url_prefix 'name.url.prefix := } if$ num.names.shown.with.forced.et.al max.num.names.before.forced.et.al > { "CTLnames_show_etal cannot be greater than CTLmax_names_forced_etal in " cite$ * warning$ max.num.names.before.forced.et.al 'num.names.shown.with.forced.et.al := } { skip$ } if$ } %%%%%%%%%%%%%%%%%%% %% ENTRY ALIASES %% %%%%%%%%%%%%%%%%%%% FUNCTION {conference}{inproceedings} FUNCTION {online}{electronic} FUNCTION {internet}{electronic} FUNCTION {webpage}{electronic} FUNCTION {www}{electronic} FUNCTION {default.type}{misc} %%%%%%%%%%%%%%%%%% %% MAIN PROGRAM %% %%%%%%%%%%%%%%%%%% READ EXECUTE {initialize.controls} EXECUTE {initialize.status.constants} EXECUTE {banner.message} EXECUTE {initialize.longest.label} ITERATE {longest.label.pass} EXECUTE {begin.bib} ITERATE {call.type$} EXECUTE {end.bib} EXECUTE{completed.message} %% That's all folks, mds. ViennaCL-1.5.1-src/doc/manual/viennacl.tex000644 001750 001750 00000010725 12267304413 020332 0ustar00rupprupp000000 000000 %\documentclass[11pt]{article} \documentclass[11pt]{report} \usepackage{amsmath,amssymb} \usepackage{newcent} \usepackage{pstricks} \usepackage{fancyhdr} \usepackage[dvips]{graphicx} \usepackage{makeidx} \usepackage{psfrag} \usepackage{alltt} \usepackage{index} \usepackage{fancyvrb} \usepackage{pst-blur} \usepackage{pst-grad} \usepackage{epsfig} %\usepackage{subfig} \usepackage{subfigure} \usepackage[toc,page]{appendix} \usepackage[pdfauthor={Karl Rupp et al.}, pdftitle={ViennaCL 1.5.1 Manual}, colorlinks=true, linktocpage=true]{hyperref} %% Listings package START \usepackage{color} \usepackage{listings} \definecolor{darkblue}{rgb}{0,0,.6} \definecolor{darkred}{rgb}{.6,0,0} \definecolor{darkgreen}{rgb}{0,.6,0} \definecolor{red}{rgb}{.98,0,0} \definecolor{lightgrey}{rgb}{0.98,0.98,0.98} \lstloadlanguages{C++} \lstset{% language=C++, basicstyle=\small\ttfamily, commentstyle=\itshape\color{darkgreen}, keywordstyle=\bfseries\color{darkblue}, stringstyle=\color{darkred}, showspaces=false, showtabs=false, columns=fixed, backgroundcolor=\color{lightgrey}, numbers=none, frame=single, numberstyle=\tiny, breaklines=true, showstringspaces=false, xleftmargin=0.1cm }% %% Listings package STOP %Keywords and Setup \newcommand{\CMake} {\texttt{CMake}} \newcommand{\OpenMP} {\texttt{OpenMP}} \newcommand{\OpenCL} {\texttt{OpenCL}} \newcommand{\CUDA} {\texttt{CUDA}} \newcommand{\ViennaCL} {\texttt{ViennaCL}} \newcommand{\ViennaCLversion} {\texttt{ViennaCL 1.5.1}} \newcommand{\ViennaCLminorversion} {\texttt{ViennaCL 1.5.x}} \newcommand{\Boost} {\texttt{Boost}} \newcommand{\ublas} {\texttt{uBLAS}} \newcommand{\Eigen} {\texttt{Eigen}} \newcommand{\MTL} {\texttt{MTL 4}} \newcommand{\GCC} {\texttt{GCC}} \newcommand{\NVIDIA} { {NVIDIA} } \include{keywords} % [KR] This is a mess, we should delete unused Minimos-stuff \include{setup} \makeatletter \renewcommand\@endpart{\vfil \if@twoside \null \thispagestyle{empty}% \newpage \fi \if@tempswa \twocolumn \fi} \makeatother \begin{document} \pagenumbering{roman} \include{cover} \include{contributors} \clearpage \addtocontents{toc}{\protect\setcounter{tocdepth}{1}} \tableofcontents %\label{s:ipl:content} \index{IPL!content} \clearpage \pagenumbering{arabic} \include{introduction} \include{installation} %%%%%%%%%%%%%%%% Core Functionality %%%%%%%%%%%%%%%% \part{Core Functionality} The {\ViennaCL} core consists of operations and algorithms which are available on all three computing backends ({\CUDA}, host-based, {\OpenCL}). These features are considered stable and full support is provided. However, note that performance-characteristics may differ considerably on the different computing backends. In particular, the use of GPUs will not pay off if the data is too small, hence PCI-Express latency is dominant. \include{types} \include{operations} \include{algorithms} \include{other-libs} \include{memory-model} \include{shared-lib} %%%%%%%%%%%%%%% Addon Functionality %%%%%%%%%%%%%%%% \part{Addon Functionality} With the introduction of host-based, {\CUDA}- and {\OpenCL}-enabled computing backends in {\ViennaCL} 1.4.0, certain functionality is not available for all three backends and listed in the following. For example, the {\OpenCL} kernel generator makes sense in the {\OpenCL} computing backend, thus this functionality is moved out of the set of core functionality. Also, certain functionality is still in experimental stage and might experience interface changes. Although all functionality flagged as experimental and listed in this section passes a respective set of tests, library users are advised to use them with extra care and be prepared for interface changes when upgrading to a newer version of {\ViennaCL}. \include{additional-algorithms} \include{multi-device} \include{custom-kernels} \include{custom-contexts} %\include{kernel-generation} %\include{tuning} \include{structured-matrices} %%%%%%%%%%%%%%% Addon Functionality %%%%%%%%%%%%%%%% \part{Miscellaneous} \include{design} % Appendix %\appendix %\appendixpage %\addappheadtotoc \begin{appendices} \include{versioning} \include{changelogs} \include{license} \end{appendices} %\section{Bibliography} \bibliographystyle{IEEEtran_v1.13} \addcontentsline{toc}{chapter}{Bibliography} \bibliography{viennacl} %\cleardoublepage %\phantomsection %\addcontentsline{toc}{chapter}{Index} \printindex \end{document} ViennaCL-1.5.1-src/doc/manual/contributors.tex000644 001750 001750 00000001671 12267307266 021301 0ustar00rupprupp000000 000000 \clearpage Copyright {\copyright} 2010--2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. \vspace{2.cm} \textit{Project Head:}\\ Karl Rupp\\ \vspace{2.cm} \textit{Code Contributors:} \\ Evan Bollig \\ Alex Christensen (BYU) \\ Philipp Grabenweger \\ Volodymyr Kysenko \\ Nikolay Lukash \\ G\"unther Mader \\ Vittorio Patriarca \\ Florian Rudolf \\ Astrid Rupp \\ Toby St Clere Smithe \\ Philippe Tillet \\ Markus Wagner \\ Josef Weinbub \\ Michael Wild \\ \vspace{3.5cm} Institute for Microelectronics\newline Vienna University of Technology\newline Gu\ss hausstra\ss e 27-29 / E360\newline A-1040 Vienna, Austria/Europe\newline \begin{tabular}{ll} Phone & +43-1-58801-36001\\ FAX & +43-1-58801-36099\\ Web & \texttt{http://www.iue.tuwien.ac.at/} \end{tabular} ViennaCL-1.5.1-src/libviennacl/000755 001750 001750 00000000000 12267307531 016254 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/libviennacl/include/000755 001750 001750 00000000000 12267307531 017677 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/libviennacl/include/viennacl.hpp000644 001750 001750 00000121207 12267307531 022212 0ustar00rupprupp000000 000000 #ifndef VIENNACL_VIENNACL_HPP #define VIENNACL_VIENNACL_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include #ifdef VIENNACL_WITH_OPENCL #ifdef __APPLE__ #include #else #include #endif #endif // Extra export declarations when building with Visual Studio: #if defined(_MSC_VER) #if defined(viennacl_EXPORTS) #define VIENNACL_EXPORTED_FUNCTION __declspec(dllexport) #else #define VIENNACL_EXPORTED_FUNCTION __declspec(dllimport) #endif /* viennacl_EXPORTS */ #else /* defined (_MSC_VER) */ #define VIENNACL_EXPORTED_FUNCTION #endif #ifdef __cplusplus extern "C" { #endif typedef int ViennaCLInt; /************** Enums ***************/ typedef enum { ViennaCLCUDA, ViennaCLOpenCL, ViennaCLHost } ViennaCLBackendTypes; typedef enum { ViennaCLRowMajor, ViennaCLColumnMajor } ViennaCLOrder; typedef enum { ViennaCLNoTrans, ViennaCLTrans } ViennaCLTranspose; typedef enum { ViennaCLUpper, ViennaCLLower } ViennaCLUplo; typedef enum { ViennaCLUnit, ViennaCLNonUnit } ViennaCLDiag; typedef enum { ViennaCLFloat, ViennaCLDouble } ViennaCLPrecision; // Error codes: typedef enum { ViennaCLSuccess = 0, ViennaCLGenericFailure } ViennaCLStatus; /************* Backend Management ******************/ /** @brief Generic backend for CUDA, OpenCL, host-based stuff */ struct ViennaCLBackend_impl; typedef ViennaCLBackend_impl* ViennaCLBackend; VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendCreate(ViennaCLBackend * backend); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendSetOpenCLContextID(ViennaCLBackend backend, ViennaCLInt context_id); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendDestroy(ViennaCLBackend * backend); /******** User Types **********/ struct ViennaCLHostScalar_impl; typedef ViennaCLHostScalar_impl* ViennaCLHostScalar; struct ViennaCLScalar_impl; typedef ViennaCLScalar_impl* ViennaCLScalar; struct ViennaCLVector_impl; typedef ViennaCLVector_impl* ViennaCLVector; struct ViennaCLMatrix_impl; typedef ViennaCLMatrix_impl* ViennaCLMatrix; /******************** BLAS Level 1 ***********************/ // IxASUM VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLiamax(ViennaCLInt *alpha, ViennaCLVector x); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDAiSamax(ViennaCLBackend backend, ViennaCLInt n, ViennaCLInt *alpha, float *x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDAiDamax(ViennaCLBackend backend, ViennaCLInt n, ViennaCLInt *alpha, double *x, ViennaCLInt offx, ViennaCLInt incx); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLiSamax(ViennaCLBackend backend, ViennaCLInt n, ViennaCLInt *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLiDamax(ViennaCLBackend backend, ViennaCLInt n, ViennaCLInt *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostiSamax(ViennaCLBackend backend, ViennaCLInt n, ViennaCLInt *alpha, float *x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostiDamax(ViennaCLBackend backend, ViennaCLInt n, ViennaCLInt *alpha, double *x, ViennaCLInt offx, ViennaCLInt incx); // xASUM VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLasum(ViennaCLHostScalar *alpha, ViennaCLVector x); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASasum(ViennaCLBackend backend, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADasum(ViennaCLBackend backend, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, ViennaCLInt incx); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSasum(ViennaCLBackend backend, ViennaCLInt n, float *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDasum(ViennaCLBackend backend, ViennaCLInt n, double *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSasum(ViennaCLBackend backend, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDasum(ViennaCLBackend backend, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, ViennaCLInt incx); // xAXPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLaxpy(ViennaCLHostScalar alpha, ViennaCLVector x, ViennaCLVector y); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASaxpy(ViennaCLBackend backend, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADaxpy(ViennaCLBackend backend, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSaxpy(ViennaCLBackend backend, ViennaCLInt n, float alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDaxpy(ViennaCLBackend backend, ViennaCLInt n, double alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSaxpy(ViennaCLBackend backend, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDaxpy(ViennaCLBackend backend, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy); // xCOPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLcopy(ViennaCLVector x, ViennaCLVector y); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDAScopy(ViennaCLBackend backend, ViennaCLInt n, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADcopy(ViennaCLBackend backend, ViennaCLInt n, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLScopy(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDcopy(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostScopy(ViennaCLBackend backend, ViennaCLInt n, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDcopy(ViennaCLBackend backend, ViennaCLInt n, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy); // xDOT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLdot(ViennaCLHostScalar *alpha, ViennaCLVector x, ViennaCLVector y); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASdot(ViennaCLBackend backend, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADdot(ViennaCLBackend backend, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSdot(ViennaCLBackend backend, ViennaCLInt n, float *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDdot(ViennaCLBackend backend, ViennaCLInt n, double *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSdot(ViennaCLBackend backend, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDdot(ViennaCLBackend backend, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy); // xNRM2 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLnrm2(ViennaCLHostScalar *alpha, ViennaCLVector x); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASnrm2(ViennaCLBackend backend, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADnrm2(ViennaCLBackend backend, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, ViennaCLInt incx); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSnrm2(ViennaCLBackend backend, ViennaCLInt n, float *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDnrm2(ViennaCLBackend backend, ViennaCLInt n, double *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSnrm2(ViennaCLBackend backend, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDnrm2(ViennaCLBackend backend, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, ViennaCLInt incx); // xROT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLrot(ViennaCLVector x, ViennaCLVector y, ViennaCLHostScalar c, ViennaCLHostScalar s); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASrot(ViennaCLBackend backend, ViennaCLInt n, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy, float c, float s); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADrot(ViennaCLBackend backend, ViennaCLInt n, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy, double c, double s); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSrot(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, float c, float s); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDrot(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, double c, double s); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSrot(ViennaCLBackend backend, ViennaCLInt n, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy, float c, float s); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDrot(ViennaCLBackend backend, ViennaCLInt n, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy, double c, double s); // xSCAL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLscal(ViennaCLHostScalar alpha, ViennaCLVector x); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASscal(ViennaCLBackend backend, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADscal(ViennaCLBackend backend, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, ViennaCLInt incx); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSscal(ViennaCLBackend backend, ViennaCLInt n, float alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDscal(ViennaCLBackend backend, ViennaCLInt n, double alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSscal(ViennaCLBackend backend, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDscal(ViennaCLBackend backend, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, ViennaCLInt incx); // xSWAP VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLswap(ViennaCLVector x, ViennaCLVector y); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASswap(ViennaCLBackend backend, ViennaCLInt n, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADswap(ViennaCLBackend backend, ViennaCLInt n, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSswap(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDswap(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSswap(ViennaCLBackend backend, ViennaCLInt n, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDswap(ViennaCLBackend backend, ViennaCLInt n, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy); /******************** BLAS Level 2 ***********************/ // xGEMV: y <- alpha * Ax + beta * y VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemv(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLVector x, ViennaCLHostScalar beta, ViennaCLVector y); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, ViennaCLInt incx, float beta, float *y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, ViennaCLInt incx, double beta, double *y, ViennaCLInt offy, ViennaCLInt incy); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, float beta, cl_mem y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, double beta, cl_mem y, ViennaCLInt offy, ViennaCLInt incy); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, ViennaCLInt incx, float beta, float *y, ViennaCLInt offy, ViennaCLInt incy); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, ViennaCLInt incx, double beta, double *y, ViennaCLInt offy, ViennaCLInt incy); // xTRSV: Ax <- x VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsv(ViennaCLMatrix A, ViennaCLVector x, ViennaCLUplo uplo); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDAStrsv(ViennaCLBackend backend, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag, ViennaCLInt n, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADtrsv(ViennaCLBackend backend, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag, ViennaCLInt n, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, ViennaCLInt incx); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLStrsv(ViennaCLBackend backend, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag, ViennaCLInt n, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDtrsv(ViennaCLBackend backend, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag, ViennaCLInt n, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostStrsv(ViennaCLBackend backend, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag, ViennaCLInt n, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, ViennaCLInt incx); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDtrsv(ViennaCLBackend backend, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag, ViennaCLInt n, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, ViennaCLInt incx); // xGER: A <- alpha * x * y + A VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLger(ViennaCLHostScalar alpha, ViennaCLVector x, ViennaCLVector y, ViennaCLMatrix A); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASger(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADger(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSger(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, float alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDger(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, double alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSger(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDger(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda); /******************** BLAS Level 3 ***********************/ // xGEMM: C <- alpha * AB + beta * C VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemm(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLMatrix B, ViennaCLHostScalar beta, ViennaCLMatrix C); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc); #ifdef VIENNACL_WITH_OPENCL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc); #endif VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc); VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc); #ifdef __cplusplus } #endif #endif ViennaCL-1.5.1-src/libviennacl/CMakeLists.txt000644 001750 001750 00000003673 12267307531 021025 0ustar00rupprupp000000 000000 include_directories(${PROJECT_SOURCE_DIR}/libviennacl/include/) if(ENABLE_CUDA) if(ENABLE_OPENCL) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-DVIENNACL_WITH_OPENCL") #set flags before setting executable! cuda_add_library(viennacl SHARED src/backend.cu src/blas1.cu src/blas1_host.cu src/blas1_cuda.cu src/blas1_opencl.cu src/blas2.cu src/blas2_host.cu src/blas2_cuda.cu src/blas2_opencl.cu src/blas3.cu src/blas3_host.cu src/blas3_cuda.cu src/blas3_opencl.cu) set_target_properties(viennacl PROPERTIES COMPILE_FLAGS "-DVIENNACL_WITH_OPENCL -DVIENNACL_WITH_CUDA") target_link_libraries(viennacl ${OPENCL_LIBRARIES}) else(ENABLE_OPENCL) cuda_add_library(viennacl SHARED src/backend.cu src/blas1.cu src/blas1_host.cu src/blas1_cuda.cu src/blas2.cu src/blas2_host.cu src/blas2_cuda.cu src/blas3.cu src/blas3_host.cu src/blas3_cuda.cu) set_target_properties(viennacl PROPERTIES COMPILE_FLAGS "-DVIENNACL_WITH_CUDA") endif(ENABLE_OPENCL) else(ENABLE_CUDA) if(ENABLE_OPENCL) add_library(viennacl SHARED src/backend.cpp src/blas1.cpp src/blas1_host.cpp src/blas1_opencl.cpp src/blas2.cpp src/blas2_host.cpp src/blas2_opencl.cpp src/blas3.cpp src/blas3_host.cpp src/blas3_opencl.cpp) set_target_properties(viennacl PROPERTIES COMPILE_FLAGS "-DVIENNACL_WITH_OPENCL") target_link_libraries(viennacl ${OPENCL_LIBRARIES}) else(ENABLE_OPENCL) add_library(viennacl SHARED src/backend.cpp src/blas1.cpp src/blas1_host.cpp src/blas2.cpp src/blas2_host.cpp src/blas3.cpp src/blas3_host.cpp) endif(ENABLE_OPENCL) endif(ENABLE_CUDA) ViennaCL-1.5.1-src/libviennacl/src/000755 001750 001750 00000000000 12267307531 017043 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/libviennacl/src/blas1_host.cpp000644 001750 001750 00000024256 12267307531 021617 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" //include the generic inner product functions of ViennaCL #include "viennacl/linalg/inner_prod.hpp" //include the generic norm functions of ViennaCL #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" // IxAMAX VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostiSamax(ViennaCLBackend /*backend*/, ViennaCLInt n, ViennaCLInt *index, float *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostiDamax(ViennaCLBackend /*backend*/, ViennaCLInt n, ViennaCLInt *index, double *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } // xASUM VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSasum(ViennaCLBackend /*backend*/, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDasum(ViennaCLBackend /*backend*/, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } // xAXPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSaxpy(ViennaCLBackend /*backend*/, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); v2 += alpha * v1; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDaxpy(ViennaCLBackend /*backend*/, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); v2 += alpha * v1; return ViennaCLSuccess; } // xCOPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostScopy(ViennaCLBackend /*backend*/, ViennaCLInt n, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); v2 = v1; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDcopy(ViennaCLBackend /*backend*/, ViennaCLInt n, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); v2 = v1; return ViennaCLSuccess; } // xAXPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSdot(ViennaCLBackend /*backend*/, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); *alpha = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDdot(ViennaCLBackend /*backend*/, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); *alpha = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } // xNRM2 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSnrm2(ViennaCLBackend /*backend*/, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDnrm2(ViennaCLBackend /*backend*/, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } // xROT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSrot(ViennaCLBackend /*backend*/, ViennaCLInt n, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy, float c, float s) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); viennacl::linalg::plane_rotation(v1, v2, c, s); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDrot(ViennaCLBackend /*backend*/, ViennaCLInt n, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy, double c, double s) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); viennacl::linalg::plane_rotation(v1, v2, c, s); return ViennaCLSuccess; } // xSCAL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSscal(ViennaCLBackend /*backend*/, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); v1 *= alpha; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDscal(ViennaCLBackend /*backend*/, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); v1 *= alpha; return ViennaCLSuccess; } // xSWAP VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSswap(ViennaCLBackend /*backend*/, ViennaCLInt n, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); viennacl::swap(v1, v2); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDswap(ViennaCLBackend /*backend*/, ViennaCLInt n, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); viennacl::swap(v1, v2); return ViennaCLSuccess; } ViennaCL-1.5.1-src/libviennacl/src/init_matrix.hpp000644 001750 001750 00000006207 12267307531 022110 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "viennacl.hpp" #include "viennacl/backend/mem_handle.hpp" static ViennaCLStatus init_cuda_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) { #ifdef VIENNACL_WITH_CUDA h.switch_active_handle_id(viennacl::CUDA_MEMORY); h.cuda_handle().reset(A->cuda_mem); h.cuda_handle().inc(); if (A->precision == ViennaCLFloat) h.raw_size(A->internal_size1 * A->internal_size2 * sizeof(float)); // not necessary, but still set for conciseness else if (A->precision == ViennaCLDouble) h.raw_size(A->internal_size1 * A->internal_size2 * sizeof(double)); // not necessary, but still set for conciseness else return ViennaCLGenericFailure; return ViennaCLSuccess; #else (void)h; (void)A; return ViennaCLGenericFailure; #endif } static ViennaCLStatus init_opencl_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) { #ifdef VIENNACL_WITH_OPENCL h.switch_active_handle_id(viennacl::OPENCL_MEMORY); h.opencl_handle() = A->opencl_mem; h.opencl_handle().inc(); if (A->precision == ViennaCLFloat) h.raw_size(A->internal_size1 * A->internal_size2 * sizeof(float)); // not necessary, but still set for conciseness else if (A->precision == ViennaCLDouble) h.raw_size(A->internal_size1 * A->internal_size2 * sizeof(double)); // not necessary, but still set for conciseness else return ViennaCLGenericFailure; return ViennaCLSuccess; #else (void)h; (void)A; return ViennaCLGenericFailure; #endif } static ViennaCLStatus init_host_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) { h.switch_active_handle_id(viennacl::MAIN_MEMORY); h.ram_handle().reset(A->host_mem); h.ram_handle().inc(); if (A->precision == ViennaCLFloat) h.raw_size(A->internal_size1 * A->internal_size2 * sizeof(float)); // not necessary, but still set for conciseness else if (A->precision == ViennaCLDouble) h.raw_size(A->internal_size1 * A->internal_size2 * sizeof(double)); // not necessary, but still set for conciseness else return ViennaCLGenericFailure; return ViennaCLSuccess; } static ViennaCLStatus init_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) { switch (A->backend->backend_type) { case ViennaCLCUDA: return init_cuda_matrix(h, A); case ViennaCLOpenCL: return init_opencl_matrix(h, A); case ViennaCLHost: return init_host_matrix(h, A); default: return ViennaCLGenericFailure; } } ViennaCL-1.5.1-src/libviennacl/src/blas2_host.cpp000644 001750 001750 00000030620 12267307531 021610 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" // xGEMV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemv(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, int incx, float beta, float *y, ViennaCLInt offy, int incy) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } else { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemv(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, int incx, double beta, double *y, ViennaCLInt offy, int incy) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } else { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } // xTRSV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostStrsv(ViennaCLBackend /*backend*/, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt n, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, int incx) { if (order == ViennaCLRowMajor) { viennacl::vector_base v(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, n, offA_row, incA_row, n, n, offA_col, incA_col, lda); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } else { viennacl::vector_base v(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, n, offA_row, incA_row, lda, n, offA_col, incA_col, n); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDtrsv(ViennaCLBackend /*backend*/, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt n, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, int incx) { if (order == ViennaCLRowMajor) { viennacl::vector_base v(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, n, offA_row, incA_row, n, n, offA_col, incA_col, lda); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } else { viennacl::vector_base v(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, n, offA_row, incA_row, lda, n, offA_col, incA_col, n); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } // xGER VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSger(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDger(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } ViennaCL-1.5.1-src/libviennacl/src/blas1.cu000644 001750 001750 00000024042 12267307531 020400 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" #include "init_vector.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" //include the generic inner product functions of ViennaCL #include "viennacl/linalg/inner_prod.hpp" //include the generic norm functions of ViennaCL #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" // IxAMAX VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLiamax(ViennaCLInt *index, ViennaCLVector x) { viennacl::backend::mem_handle v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xASUM VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLasum(ViennaCLHostScalar *alpha, ViennaCLVector x) { if ((*alpha)->precision != x->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); (*alpha)->value_float = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); (*alpha)->value_double = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xAXPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLaxpy(ViennaCLHostScalar alpha, ViennaCLVector x, ViennaCLVector y) { if (alpha->precision != x->precision) return ViennaCLGenericFailure; if (x->precision != y->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); v2 += alpha->value_float * v1; return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); v2 += alpha->value_double * v1; return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xCOPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLcopy(ViennaCLVector x, ViennaCLVector y) { if (x->precision != y->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); v2 = v1; return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); v2 = v1; return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xDOT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLdot(ViennaCLHostScalar *alpha, ViennaCLVector x, ViennaCLVector y) { if ((*alpha)->precision != x->precision) return ViennaCLGenericFailure; if (x->precision != y->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); (*alpha)->value_float = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); (*alpha)->value_double = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xNRM2 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLnrm2(ViennaCLHostScalar *alpha, ViennaCLVector x) { if ((*alpha)->precision != x->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); (*alpha)->value_float = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); (*alpha)->value_double = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xROT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLrot(ViennaCLVector x, ViennaCLVector y, ViennaCLHostScalar c, ViennaCLHostScalar s) { if (c->precision != x->precision) return ViennaCLGenericFailure; if (s->precision != x->precision) return ViennaCLGenericFailure; if (x->precision != y->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); viennacl::linalg::plane_rotation(v1, v2, c->value_float, s->value_float); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); viennacl::linalg::plane_rotation(v1, v2, c->value_double, s->value_double); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xSCAL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLscal(ViennaCLHostScalar alpha, ViennaCLVector x) { if (alpha->precision != x->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); v1 *= alpha->value_float; return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); v1 *= alpha->value_double; return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xSWAP VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLswap(ViennaCLVector x, ViennaCLVector y) { if (x->precision != y->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); viennacl::swap(v1, v2); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); viennacl::swap(v1, v2); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } ViennaCL-1.5.1-src/libviennacl/src/viennacl_private.hpp000644 001750 001750 00000005373 12267307531 023115 0ustar00rupprupp000000 000000 #ifndef VIENNACL_VIENNACL_PRIVATE_HPP #define VIENNACL_VIENNACL_PRIVATE_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include #ifdef VIENNACL_WITH_OPENCL #ifdef __APPLE__ #include #else #include #endif #endif #include "viennacl.hpp" /************* Backend Management ******************/ struct ViennaCLCUDABackend_impl { //TODO: Add stream and/or device descriptors here }; struct ViennaCLOpenCLBackend_impl { ViennaCLInt context_id; }; struct ViennaCLHostBackend_impl { // Nothing to specify *at the moment* }; /** @brief Generic backend for CUDA, OpenCL, host-based stuff */ struct ViennaCLBackend_impl { ViennaCLBackendTypes backend_type; ViennaCLCUDABackend_impl cuda_backend; ViennaCLOpenCLBackend_impl opencl_backend; ViennaCLHostBackend_impl host_backend; }; /******** User Types **********/ struct ViennaCLHostScalar_impl { ViennaCLPrecision precision; union { float value_float; double value_double; }; }; struct ViennaCLScalar_impl { ViennaCLBackend backend; ViennaCLPrecision precision; // buffer: #ifdef VIENNACL_WITH_CUDA char * cuda_mem; #endif #ifdef VIENNACL_WITH_OPENCL cl_mem opencl_mem; #endif char * host_mem; ViennaCLInt offset; }; struct ViennaCLVector_impl { ViennaCLBackend backend; ViennaCLPrecision precision; // buffer: #ifdef VIENNACL_WITH_CUDA char * cuda_mem; #endif #ifdef VIENNACL_WITH_OPENCL cl_mem opencl_mem; #endif char * host_mem; ViennaCLInt offset; ViennaCLInt inc; ViennaCLInt size; }; struct ViennaCLMatrix_impl { ViennaCLBackend backend; ViennaCLPrecision precision; ViennaCLOrder order; ViennaCLTranspose trans; // buffer: #ifdef VIENNACL_WITH_CUDA char * cuda_mem; #endif #ifdef VIENNACL_WITH_OPENCL cl_mem opencl_mem; #endif char * host_mem; ViennaCLInt size1; ViennaCLInt start1; ViennaCLInt stride1; ViennaCLInt internal_size1; ViennaCLInt size2; ViennaCLInt start2; ViennaCLInt stride2; ViennaCLInt internal_size2; }; #endif ViennaCL-1.5.1-src/libviennacl/src/blas2_host.cu000644 001750 001750 00000030620 12267307531 021435 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" // xGEMV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemv(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, int incx, float beta, float *y, ViennaCLInt offy, int incy) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } else { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemv(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, int incx, double beta, double *y, ViennaCLInt offy, int incy) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } else { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } // xTRSV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostStrsv(ViennaCLBackend /*backend*/, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt n, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, int incx) { if (order == ViennaCLRowMajor) { viennacl::vector_base v(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, n, offA_row, incA_row, n, n, offA_col, incA_col, lda); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } else { viennacl::vector_base v(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, n, offA_row, incA_row, lda, n, offA_col, incA_col, n); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDtrsv(ViennaCLBackend /*backend*/, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt n, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, int incx) { if (order == ViennaCLRowMajor) { viennacl::vector_base v(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, n, offA_row, incA_row, n, n, offA_col, incA_col, lda); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } else { viennacl::vector_base v(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, n, offA_row, incA_row, lda, n, offA_col, incA_col, n); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } // xGER VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSger(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDger(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, int incA_row, int incA_col, ViennaCLInt lda) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::MAIN_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } ViennaCL-1.5.1-src/libviennacl/src/blas3.cu000644 001750 001750 00000175613 12267307531 020415 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" #include "init_matrix.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" // GEMV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemm(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLMatrix B, ViennaCLHostScalar beta, ViennaCLMatrix C) { viennacl::backend::mem_handle A_handle; viennacl::backend::mem_handle B_handle; viennacl::backend::mem_handle C_handle; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(B_handle, B) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(C_handle, C) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (A->precision) { case ViennaCLFloat: { if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else return ViennaCLGenericFailure; return ViennaCLSuccess; } case ViennaCLDouble: { if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else return ViennaCLGenericFailure; return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xTRSV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsm(ViennaCLMatrix A, ViennaCLUplo uplo, ViennaCLDiag diag, ViennaCLMatrix B) { viennacl::backend::mem_handle A_handle; viennacl::backend::mem_handle B_handle; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(B_handle, B) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (A->precision) { case ViennaCLFloat: { if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } return ViennaCLSuccess; } case ViennaCLDouble: { if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } ViennaCL-1.5.1-src/libviennacl/src/blas1.cpp000644 001750 001750 00000024042 12267307531 020553 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" #include "init_vector.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" //include the generic inner product functions of ViennaCL #include "viennacl/linalg/inner_prod.hpp" //include the generic norm functions of ViennaCL #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" // IxAMAX VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLiamax(ViennaCLInt *index, ViennaCLVector x) { viennacl::backend::mem_handle v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xASUM VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLasum(ViennaCLHostScalar *alpha, ViennaCLVector x) { if ((*alpha)->precision != x->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); (*alpha)->value_float = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); (*alpha)->value_double = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xAXPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLaxpy(ViennaCLHostScalar alpha, ViennaCLVector x, ViennaCLVector y) { if (alpha->precision != x->precision) return ViennaCLGenericFailure; if (x->precision != y->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); v2 += alpha->value_float * v1; return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); v2 += alpha->value_double * v1; return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xCOPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLcopy(ViennaCLVector x, ViennaCLVector y) { if (x->precision != y->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); v2 = v1; return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); v2 = v1; return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xDOT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLdot(ViennaCLHostScalar *alpha, ViennaCLVector x, ViennaCLVector y) { if ((*alpha)->precision != x->precision) return ViennaCLGenericFailure; if (x->precision != y->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); (*alpha)->value_float = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); (*alpha)->value_double = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xNRM2 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLnrm2(ViennaCLHostScalar *alpha, ViennaCLVector x) { if ((*alpha)->precision != x->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); (*alpha)->value_float = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); (*alpha)->value_double = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xROT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLrot(ViennaCLVector x, ViennaCLVector y, ViennaCLHostScalar c, ViennaCLHostScalar s) { if (c->precision != x->precision) return ViennaCLGenericFailure; if (s->precision != x->precision) return ViennaCLGenericFailure; if (x->precision != y->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); viennacl::linalg::plane_rotation(v1, v2, c->value_float, s->value_float); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); viennacl::linalg::plane_rotation(v1, v2, c->value_double, s->value_double); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xSCAL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLscal(ViennaCLHostScalar alpha, ViennaCLVector x) { if (alpha->precision != x->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); v1 *= alpha->value_float; return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); v1 *= alpha->value_double; return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xSWAP VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLswap(ViennaCLVector x, ViennaCLVector y) { if (x->precision != y->precision) return ViennaCLGenericFailure; viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); viennacl::swap(v1, v2); return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); viennacl::swap(v1, v2); return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } ViennaCL-1.5.1-src/libviennacl/src/blas1_opencl.cu000644 001750 001750 00000027120 12267307531 021740 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" //include the generic inner product functions of ViennaCL #include "viennacl/linalg/inner_prod.hpp" //include the generic norm functions of ViennaCL #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #ifdef VIENNACL_WITH_OPENCL // IxAMAX VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLiSamax(ViennaCLBackend backend, ViennaCLInt n, ViennaCLInt *index, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLiDamax(ViennaCLBackend backend, ViennaCLInt n, ViennaCLInt *index, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } // xASUM VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSasum(ViennaCLBackend backend, ViennaCLInt n, float *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDasum(ViennaCLBackend backend, ViennaCLInt n, double *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } // xAXPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSaxpy(ViennaCLBackend backend, ViennaCLInt n, float alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v2 += alpha * v1; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDaxpy(ViennaCLBackend backend, ViennaCLInt n, double alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v2 += alpha * v1; return ViennaCLSuccess; } // xCOPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLScopy(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v2 = v1; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDcopy(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v2 = v1; return ViennaCLSuccess; } // xDOT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSdot(ViennaCLBackend backend, ViennaCLInt n, float *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDdot(ViennaCLBackend backend, ViennaCLInt n, double *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } // xNRM2 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSnrm2(ViennaCLBackend backend, ViennaCLInt n, float *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDnrm2(ViennaCLBackend backend, ViennaCLInt n, double *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } // xROT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSrot(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, float c, float s) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::linalg::plane_rotation(v1, v2, c, s); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDrot(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, double c, double s) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::linalg::plane_rotation(v1, v2, c, s); return ViennaCLSuccess; } // xSCAL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSscal(ViennaCLBackend backend, ViennaCLInt n, float alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v1 *= alpha; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDscal(ViennaCLBackend backend, ViennaCLInt n, double alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v1 *= alpha; return ViennaCLSuccess; } // xSWAP VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSswap(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::swap(v1, v2); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDswap(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::swap(v1, v2); return ViennaCLSuccess; } #endif ViennaCL-1.5.1-src/libviennacl/src/blas2.cpp000644 001750 001750 00000026310 12267307531 020554 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" #include "init_vector.hpp" #include "init_matrix.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" // GEMV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemv(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLVector x, ViennaCLHostScalar beta, ViennaCLVector y) { viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; viennacl::backend::mem_handle A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); v2 *= beta->value_float; if (A->trans == ViennaCLTrans) v2 += alpha->value_float * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha->value_float * viennacl::linalg::prod(mat, v1); } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); v2 *= beta->value_float; if (A->trans == ViennaCLTrans) v2 += alpha->value_float * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha->value_float * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); v2 *= beta->value_double; if (A->trans == ViennaCLTrans) v2 += alpha->value_double * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha->value_double * viennacl::linalg::prod(mat, v1); } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); v2 *= beta->value_double; if (A->trans == ViennaCLTrans) v2 += alpha->value_double * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha->value_double * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xTRSV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsv(ViennaCLMatrix A, ViennaCLVector x, ViennaCLUplo uplo) { viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); if (A->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::lower_tag()); } } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); if (A->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); if (A->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::lower_tag()); } } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); if (A->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xGER VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLger(ViennaCLHostScalar alpha, ViennaCLVector x, ViennaCLVector y, ViennaCLMatrix A) { viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; viennacl::backend::mem_handle A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); mat += alpha->value_float * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); mat += alpha->value_float * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); mat += alpha->value_double * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); mat += alpha->value_double * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } ViennaCL-1.5.1-src/libviennacl/src/blas3_cuda.cu000644 001750 001750 00000033112 12267307531 021374 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" #include "blas3.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" #ifdef VIENNACL_WITH_CUDA // // xGEMV // namespace detail { template ViennaCLStatus ViennaCLCUDAgemm_impl(ViennaCLBackend /*backend*/, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, NumericT alpha, NumericT *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, NumericT *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, NumericT beta, NumericT *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { ViennaCLInt A_size1 = (transA == ViennaCLTrans) ? k : m; ViennaCLInt A_size2 = (transA == ViennaCLTrans) ? m : k; ViennaCLInt B_size1 = (transB == ViennaCLTrans) ? n : k; ViennaCLInt B_size2 = (transB == ViennaCLTrans) ? k : n; /////// A row-major if (orderA == ViennaCLRowMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::CUDA_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::CUDA_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::CUDA_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::CUDA_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::CUDA_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::CUDA_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::CUDA_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::CUDA_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::CUDA_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::CUDA_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::CUDA_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::CUDA_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } /////// A column-major else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::CUDA_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::CUDA_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::CUDA_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::CUDA_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::CUDA_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::CUDA_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::CUDA_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::CUDA_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::CUDA_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::CUDA_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::CUDA_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::CUDA_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } return ViennaCLSuccess; } } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { return detail::ViennaCLCUDAgemm_impl(backend, orderA, transA, orderB, transB, orderC, m, n, k, alpha, A, offA_row, offA_col, incA_row, incA_col, lda, B, offB_row, offB_col, incB_row, incB_col, ldb, beta, C, offC_row, offC_col, incC_row, incC_col, ldc); } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { return detail::ViennaCLCUDAgemm_impl(backend, orderA, transA, orderB, transB, orderC, m, n, k, alpha, A, offA_row, offA_col, incA_row, incA_col, lda, B, offB_row, offB_col, incB_row, incB_col, ldb, beta, C, offC_row, offC_col, incC_row, incC_col, ldc); } #endif ViennaCL-1.5.1-src/libviennacl/src/blas1_cuda.cu000644 001750 001750 00000024663 12267307531 021405 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" //include the generic inner product functions of ViennaCL #include "viennacl/linalg/inner_prod.hpp" //include the generic norm functions of ViennaCL #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #ifdef VIENNACL_WITH_CUDA // IxAMAX VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDAiSamax(ViennaCLBackend /*backend*/, ViennaCLInt n, ViennaCLInt *index, float *x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDAiDamax(ViennaCLBackend /*backend*/, ViennaCLInt n, ViennaCLInt *index, double *x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } // xASUM VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASasum(ViennaCLBackend /*backend*/, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADasum(ViennaCLBackend /*backend*/, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } // xAXPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASaxpy(ViennaCLBackend /*backend*/, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, n, offy, incy); v2 += alpha * v1; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADaxpy(ViennaCLBackend /*backend*/, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, n, offy, incy); v2 += alpha * v1; return ViennaCLSuccess; } // xCOPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDAScopy(ViennaCLBackend /*backend*/, ViennaCLInt n, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, n, offy, incy); v2 = v1; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADcopy(ViennaCLBackend /*backend*/, ViennaCLInt n, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, n, offy, incy); v2 = v1; return ViennaCLSuccess; } // xDOT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASdot(ViennaCLBackend /*backend*/, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, n, offy, incy); *alpha = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADdot(ViennaCLBackend /*backend*/, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, n, offy, incy); *alpha = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } // xNRM2 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASnrm2(ViennaCLBackend /*backend*/, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADnrm2(ViennaCLBackend /*backend*/, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } // xROT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASrot(ViennaCLBackend /*backend*/, ViennaCLInt n, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy, float c, float s) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, n, offy, incy); viennacl::linalg::plane_rotation(v1, v2, c, s); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADrot(ViennaCLBackend /*backend*/, ViennaCLInt n, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy, double c, double s) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, n, offy, incy); viennacl::linalg::plane_rotation(v1, v2, c, s); return ViennaCLSuccess; } // xSCAL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASscal(ViennaCLBackend /*backend*/, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); v1 *= alpha; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADscal(ViennaCLBackend /*backend*/, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); v1 *= alpha; return ViennaCLSuccess; } // xSWAP VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASswap(ViennaCLBackend /*backend*/, ViennaCLInt n, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, n, offy, incy); viennacl::swap(v1, v2); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADswap(ViennaCLBackend /*backend*/, ViennaCLInt n, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, n, offy, incy); viennacl::swap(v1, v2); return ViennaCLSuccess; } #endif ViennaCL-1.5.1-src/libviennacl/src/blas3_opencl.cu000644 001750 001750 00000035170 12267307531 021746 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" #include "blas3.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" #ifdef VIENNACL_WITH_OPENCL // // xGEMV // namespace detail { template ViennaCLStatus ViennaCLOpenCLgemm_impl(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, NumericT alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, NumericT beta, cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { ViennaCLInt A_size1 = (transA == ViennaCLTrans) ? k : m; ViennaCLInt A_size2 = (transA == ViennaCLTrans) ? m : k; ViennaCLInt B_size1 = (transB == ViennaCLTrans) ? n : k; ViennaCLInt B_size2 = (transB == ViennaCLTrans) ? k : n; /////// A row-major if (orderA == ViennaCLRowMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } /////// A column-major else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } return ViennaCLSuccess; } } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { return detail::ViennaCLOpenCLgemm_impl(backend, orderA, transA, orderB, transB, orderC, m, n, k, alpha, A, offA_row, offA_col, incA_row, incA_col, lda, B, offB_row, offB_col, incB_row, incB_col, ldb, beta, C, offC_row, offC_col, incC_row, incC_col, ldc); } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { return detail::ViennaCLOpenCLgemm_impl(backend, orderA, transA, orderB, transB, orderC, m, n, k, alpha, A, offA_row, offA_col, incA_row, incA_col, lda, B, offB_row, offB_col, incB_row, incB_col, ldb, beta, C, offC_row, offC_col, incC_row, incC_col, ldc); } #endif ViennaCL-1.5.1-src/libviennacl/src/blas1_opencl.cpp000644 001750 001750 00000027120 12267307531 022113 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" //include the generic inner product functions of ViennaCL #include "viennacl/linalg/inner_prod.hpp" //include the generic norm functions of ViennaCL #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #ifdef VIENNACL_WITH_OPENCL // IxAMAX VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLiSamax(ViennaCLBackend backend, ViennaCLInt n, ViennaCLInt *index, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLiDamax(ViennaCLBackend backend, ViennaCLInt n, ViennaCLInt *index, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } // xASUM VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSasum(ViennaCLBackend backend, ViennaCLInt n, float *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDasum(ViennaCLBackend backend, ViennaCLInt n, double *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } // xAXPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSaxpy(ViennaCLBackend backend, ViennaCLInt n, float alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v2 += alpha * v1; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDaxpy(ViennaCLBackend backend, ViennaCLInt n, double alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v2 += alpha * v1; return ViennaCLSuccess; } // xCOPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLScopy(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v2 = v1; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDcopy(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v2 = v1; return ViennaCLSuccess; } // xDOT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSdot(ViennaCLBackend backend, ViennaCLInt n, float *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDdot(ViennaCLBackend backend, ViennaCLInt n, double *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } // xNRM2 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSnrm2(ViennaCLBackend backend, ViennaCLInt n, float *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDnrm2(ViennaCLBackend backend, ViennaCLInt n, double *alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); *alpha = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } // xROT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSrot(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, float c, float s) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::linalg::plane_rotation(v1, v2, c, s); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDrot(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, double c, double s) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::linalg::plane_rotation(v1, v2, c, s); return ViennaCLSuccess; } // xSCAL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSscal(ViennaCLBackend backend, ViennaCLInt n, float alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v1 *= alpha; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDscal(ViennaCLBackend backend, ViennaCLInt n, double alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); v1 *= alpha; return ViennaCLSuccess; } // xSWAP VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSswap(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::swap(v1, v2); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDswap(ViennaCLBackend backend, ViennaCLInt n, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, n, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::swap(v1, v2); return ViennaCLSuccess; } #endif ViennaCL-1.5.1-src/libviennacl/src/blas3_host.cu000644 001750 001750 00000033037 12267307531 021443 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" #include "blas3.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" // // xGEMV // namespace detail { template ViennaCLStatus ViennaCLHostgemm_impl(ViennaCLBackend /*backend*/, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, NumericT alpha, NumericT *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, NumericT *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, NumericT beta, NumericT *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { ViennaCLInt A_size1 = (transA == ViennaCLTrans) ? k : m; ViennaCLInt A_size2 = (transA == ViennaCLTrans) ? m : k; ViennaCLInt B_size1 = (transB == ViennaCLTrans) ? n : k; ViennaCLInt B_size2 = (transB == ViennaCLTrans) ? k : n; /////// A row-major if (orderA == ViennaCLRowMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } /////// A column-major else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } return ViennaCLSuccess; } } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { return detail::ViennaCLHostgemm_impl(backend, orderA, transA, orderB, transB, orderC, m, n, k, alpha, A, offA_row, offA_col, incA_row, incA_col, lda, B, offB_row, offB_col, incB_row, incB_col, ldb, beta, C, offC_row, offC_col, incC_row, incC_col, ldc); } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { return detail::ViennaCLHostgemm_impl(backend, orderA, transA, orderB, transB, orderC, m, n, k, alpha, A, offA_row, offA_col, incA_row, incA_col, lda, B, offB_row, offB_col, incB_row, incB_col, ldb, beta, C, offC_row, offC_col, incC_row, incC_col, ldc); } ViennaCL-1.5.1-src/libviennacl/src/blas2_opencl.cu000644 001750 001750 00000033404 12267307531 021743 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" // xGEMV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, float beta, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, m, n, offA_col, incA_col, lda); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } else { viennacl::vector_base v1(x, n, offx, incx); viennacl::vector_base v2(y, m, offy, incy); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, lda, n, offA_col, incA_col, n); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, double beta, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, m, n, offA_col, incA_col, lda); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } else { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, lda, n, offA_col, incA_col, n); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } // xTRSV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLStrsv(ViennaCLBackend backend, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt n, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { if (order == ViennaCLRowMajor) { viennacl::vector_base v(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), n, offA_row, incA_row, n, n, offA_col, incA_col, lda); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } else { viennacl::vector_base v(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), n, offA_row, incA_row, lda, n, offA_col, incA_col, n); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDtrsv(ViennaCLBackend backend, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt n, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { if (order == ViennaCLRowMajor) { viennacl::vector_base v(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), n, offA_row, incA_row, n, n, offA_col, incA_col, lda); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } else { viennacl::vector_base v(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), n, offA_row, incA_row, lda, n, offA_col, incA_col, n); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } // xGER VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSger(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, float alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, m, n, offA_col, incA_col, lda); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, lda, n, offA_col, incA_col, n); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDger(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, double alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, m, n, offA_col, incA_col, lda); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, lda, n, offA_col, incA_col, n); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } ViennaCL-1.5.1-src/libviennacl/src/blas3.cpp000644 001750 001750 00000175613 12267307531 020570 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" #include "init_matrix.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" // GEMV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemm(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLMatrix B, ViennaCLHostScalar beta, ViennaCLMatrix C) { viennacl::backend::mem_handle A_handle; viennacl::backend::mem_handle B_handle; viennacl::backend::mem_handle C_handle; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(B_handle, B) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(C_handle, C) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (A->precision) { case ViennaCLFloat: { if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_float, beta->value_float); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_float, beta->value_float); else return ViennaCLGenericFailure; } else return ViennaCLGenericFailure; return ViennaCLSuccess; } case ViennaCLDouble: { if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor && C->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); viennacl::matrix_base mat_C(C_handle, C->size1, C->start1, C->stride1, C->internal_size1, C->size2, C->start2, C->stride2, C->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(mat_A), mat_B, mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) viennacl::linalg::prod_impl(mat_A, viennacl::trans(mat_B), mat_C, alpha->value_double, beta->value_double); else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) viennacl::linalg::prod_impl(mat_A, mat_B, mat_C, alpha->value_double, beta->value_double); else return ViennaCLGenericFailure; } else return ViennaCLGenericFailure; return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xTRSV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsm(ViennaCLMatrix A, ViennaCLUplo uplo, ViennaCLDiag diag, ViennaCLMatrix B) { viennacl::backend::mem_handle A_handle; viennacl::backend::mem_handle B_handle; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(B_handle, B) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (A->precision) { case ViennaCLFloat: { if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } return ViennaCLSuccess; } case ViennaCLDouble: { if (A->order == ViennaCLRowMajor && B->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLRowMajor && B->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLRowMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } else if (A->order == ViennaCLColumnMajor && B->order == ViennaCLColumnMajor) { viennacl::matrix_base mat_A(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); viennacl::matrix_base mat_B(B_handle, B->size1, B->start1, B->stride1, B->internal_size1, B->size2, B->start2, B->stride2, B->internal_size2); if (A->trans == ViennaCLTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(viennacl::trans(mat_A), viennacl::trans(mat_B), viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } else if (A->trans == ViennaCLNoTrans && B->trans == ViennaCLNoTrans) { if (uplo == ViennaCLUpper && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::upper_tag()); else if (uplo == ViennaCLUpper && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_upper_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLNonUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::lower_tag()); else if (uplo == ViennaCLLower && diag == ViennaCLUnit) viennacl::linalg::inplace_solve(mat_A, mat_B, viennacl::linalg::unit_lower_tag()); else return ViennaCLGenericFailure; } } return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } ViennaCL-1.5.1-src/libviennacl/src/blas3_host.cpp000644 001750 001750 00000033037 12267307531 021616 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" #include "blas3.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" // // xGEMV // namespace detail { template ViennaCLStatus ViennaCLHostgemm_impl(ViennaCLBackend /*backend*/, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, NumericT alpha, NumericT *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, NumericT *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, NumericT beta, NumericT *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { ViennaCLInt A_size1 = (transA == ViennaCLTrans) ? k : m; ViennaCLInt A_size2 = (transA == ViennaCLTrans) ? m : k; ViennaCLInt B_size1 = (transB == ViennaCLTrans) ? n : k; ViennaCLInt B_size2 = (transB == ViennaCLTrans) ? k : n; /////// A row-major if (orderA == ViennaCLRowMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } /////// A column-major else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::MAIN_MEMORY, A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::MAIN_MEMORY, B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::MAIN_MEMORY, m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } return ViennaCLSuccess; } } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, float *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { return detail::ViennaCLHostgemm_impl(backend, orderA, transA, orderB, transB, orderC, m, n, k, alpha, A, offA_row, offA_col, incA_row, incA_col, lda, B, offB_row, offB_col, incB_row, incB_col, ldb, beta, C, offC_row, offC_col, incC_row, incC_col, ldc); } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, double *C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { return detail::ViennaCLHostgemm_impl(backend, orderA, transA, orderB, transB, orderC, m, n, k, alpha, A, offA_row, offA_col, incA_row, incA_col, lda, B, offB_row, offB_col, incB_row, incB_col, ldb, beta, C, offC_row, offC_col, incC_row, incC_col, ldc); } ViennaCL-1.5.1-src/libviennacl/src/blas3.hpp000644 001750 001750 00000004234 12267307531 020563 0ustar00rupprupp000000 000000 #ifndef VIENNACL_SRC_BLAS3_HPP #define VIENNACL_SRC_BLAS3_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" namespace detail { template void gemm_dispatch(ScalarType alpha, MatrixTypeA const & A, ViennaCLTranspose transA, MatrixTypeB const & B, ViennaCLTranspose transB, ScalarType beta, MatrixTypeC & C) { if (transA == ViennaCLTrans && transB == ViennaCLTrans) viennacl::linalg::prod_impl(viennacl::trans(A), viennacl::trans(B), C, alpha, beta); else if (transA == ViennaCLTrans && transB == ViennaCLNoTrans) viennacl::linalg::prod_impl(viennacl::trans(A), B, C, alpha, beta); else if (transA == ViennaCLNoTrans && transB == ViennaCLTrans) viennacl::linalg::prod_impl(A, viennacl::trans(B), C, alpha, beta); else if (transA == ViennaCLNoTrans && transB == ViennaCLNoTrans) viennacl::linalg::prod_impl(A, B, C, alpha, beta); //else // return ViennaCLGenericFailure; } } #endif ViennaCL-1.5.1-src/libviennacl/src/blas2_cuda.cu000644 001750 001750 00000031211 12267307531 021371 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" #ifdef VIENNACL_WITH_CUDA // xGEMV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemv(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, ViennaCLInt incx, float beta, float *y, ViennaCLInt offy, ViennaCLInt incy) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } else { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemv(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, ViennaCLInt incx, double beta, double *y, ViennaCLInt offy, ViennaCLInt incy) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } else { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } // xTRSV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDAStrsv(ViennaCLBackend /*backend*/, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag, ViennaCLInt n, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, ViennaCLInt incx) { if (order == ViennaCLRowMajor) { viennacl::vector_base v(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, n, offA_row, incA_row, n, n, offA_col, incA_col, lda); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } else { viennacl::vector_base v(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, n, offA_row, incA_row, lda, n, offA_col, incA_col, n); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADtrsv(ViennaCLBackend /*backend*/, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag, ViennaCLInt n, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, ViennaCLInt incx) { if (order == ViennaCLRowMajor) { viennacl::vector_base v(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, n, offA_row, incA_row, n, n, offA_col, incA_col, lda); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } else { viennacl::vector_base v(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, n, offA_row, incA_row, lda, n, offA_col, incA_col, n); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } // xGER VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASger(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, ViennaCLInt incx, float *y, ViennaCLInt offy, ViennaCLInt incy, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADger(ViennaCLBackend /*backend*/, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, ViennaCLInt incx, double *y, ViennaCLInt offy, ViennaCLInt incy, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, m, offA_row, incA_row, m, n, offA_col, incA_col, lda); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::vector_base v1(x, viennacl::CUDA_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::CUDA_MEMORY, m, offy, incy); viennacl::matrix_base mat(A, viennacl::CUDA_MEMORY, m, offA_row, incA_row, lda, n, offA_col, incA_col, n); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } #endif ViennaCL-1.5.1-src/libviennacl/src/blas3_opencl.cpp000644 001750 001750 00000035170 12267307531 022121 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" #include "blas3.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" #ifdef VIENNACL_WITH_OPENCL // // xGEMV // namespace detail { template ViennaCLStatus ViennaCLOpenCLgemm_impl(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, NumericT alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, NumericT beta, cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { ViennaCLInt A_size1 = (transA == ViennaCLTrans) ? k : m; ViennaCLInt A_size2 = (transA == ViennaCLTrans) ? m : k; ViennaCLInt B_size1 = (transB == ViennaCLTrans) ? n : k; ViennaCLInt B_size2 = (transB == ViennaCLTrans) ? k : n; /////// A row-major if (orderA == ViennaCLRowMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLRowMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, m, A_size2, offA_col, incA_col, lda); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } /////// A column-major else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLRowMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, k, B_size2, offB_col, incB_col, ldb); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLRowMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, m, n, offC_col, incC_col, ldc); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } else if (orderA == ViennaCLColumnMajor && orderB == ViennaCLColumnMajor && orderC == ViennaCLColumnMajor) { viennacl::matrix_base matA(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), A_size1, offA_row, incA_row, lda, A_size2, offA_col, incA_col, k); viennacl::matrix_base matB(B, viennacl::ocl::get_context(backend->opencl_backend.context_id), B_size1, offB_row, incB_row, ldb, B_size2, offB_col, incB_col, n); viennacl::matrix_base matC(C, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offC_row, incC_row, ldc, n, offC_col, incC_col, n); detail::gemm_dispatch(alpha, matA, transA, matB, transB, beta, matC); } return ViennaCLSuccess; } } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, float alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, float beta, cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { return detail::ViennaCLOpenCLgemm_impl(backend, orderA, transA, orderB, transB, orderC, m, n, k, alpha, A, offA_row, offA_col, incA_row, incA_col, lda, B, offB_row, offB_col, incB_row, incB_col, ldb, beta, C, offC_row, offC_col, incC_row, incC_col, ldc); } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemm(ViennaCLBackend backend, ViennaCLOrder orderA, ViennaCLTranspose transA, ViennaCLOrder orderB, ViennaCLTranspose transB, ViennaCLOrder orderC, ViennaCLInt m, ViennaCLInt n, ViennaCLInt k, double alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem B, ViennaCLInt offB_row, ViennaCLInt offB_col, ViennaCLInt incB_row, ViennaCLInt incB_col, ViennaCLInt ldb, double beta, cl_mem C, ViennaCLInt offC_row, ViennaCLInt offC_col, ViennaCLInt incC_row, ViennaCLInt incC_col, ViennaCLInt ldc) { return detail::ViennaCLOpenCLgemm_impl(backend, orderA, transA, orderB, transB, orderC, m, n, k, alpha, A, offA_row, offA_col, incA_row, incA_col, lda, B, offB_row, offB_col, incB_row, incB_col, ldb, beta, C, offC_row, offC_col, incC_row, incC_col, ldc); } #endif ViennaCL-1.5.1-src/libviennacl/src/blas1_host.cu000644 001750 001750 00000024256 12267307531 021444 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" //include the generic inner product functions of ViennaCL #include "viennacl/linalg/inner_prod.hpp" //include the generic norm functions of ViennaCL #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" // IxAMAX VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostiSamax(ViennaCLBackend /*backend*/, ViennaCLInt n, ViennaCLInt *index, float *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostiDamax(ViennaCLBackend /*backend*/, ViennaCLInt n, ViennaCLInt *index, double *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *index = static_cast(viennacl::linalg::index_norm_inf(v1)); return ViennaCLSuccess; } // xASUM VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSasum(ViennaCLBackend /*backend*/, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDasum(ViennaCLBackend /*backend*/, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_1(v1); return ViennaCLSuccess; } // xAXPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSaxpy(ViennaCLBackend /*backend*/, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); v2 += alpha * v1; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDaxpy(ViennaCLBackend /*backend*/, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); v2 += alpha * v1; return ViennaCLSuccess; } // xCOPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostScopy(ViennaCLBackend /*backend*/, ViennaCLInt n, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); v2 = v1; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDcopy(ViennaCLBackend /*backend*/, ViennaCLInt n, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); v2 = v1; return ViennaCLSuccess; } // xAXPY VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSdot(ViennaCLBackend /*backend*/, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); *alpha = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDdot(ViennaCLBackend /*backend*/, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); *alpha = viennacl::linalg::inner_prod(v1, v2); return ViennaCLSuccess; } // xNRM2 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSnrm2(ViennaCLBackend /*backend*/, ViennaCLInt n, float *alpha, float *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDnrm2(ViennaCLBackend /*backend*/, ViennaCLInt n, double *alpha, double *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); *alpha = viennacl::linalg::norm_2(v1); return ViennaCLSuccess; } // xROT VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSrot(ViennaCLBackend /*backend*/, ViennaCLInt n, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy, float c, float s) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); viennacl::linalg::plane_rotation(v1, v2, c, s); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDrot(ViennaCLBackend /*backend*/, ViennaCLInt n, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy, double c, double s) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); viennacl::linalg::plane_rotation(v1, v2, c, s); return ViennaCLSuccess; } // xSCAL VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSscal(ViennaCLBackend /*backend*/, ViennaCLInt n, float alpha, float *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); v1 *= alpha; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDscal(ViennaCLBackend /*backend*/, ViennaCLInt n, double alpha, double *x, ViennaCLInt offx, int incx) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); v1 *= alpha; return ViennaCLSuccess; } // xSWAP VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSswap(ViennaCLBackend /*backend*/, ViennaCLInt n, float *x, ViennaCLInt offx, int incx, float *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); viennacl::swap(v1, v2); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDswap(ViennaCLBackend /*backend*/, ViennaCLInt n, double *x, ViennaCLInt offx, int incx, double *y, ViennaCLInt offy, int incy) { viennacl::vector_base v1(x, viennacl::MAIN_MEMORY, n, offx, incx); viennacl::vector_base v2(y, viennacl::MAIN_MEMORY, n, offy, incy); viennacl::swap(v1, v2); return ViennaCLSuccess; } ViennaCL-1.5.1-src/libviennacl/src/blas2_opencl.cpp000644 001750 001750 00000033404 12267307531 022116 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" // xGEMV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, float beta, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, m, n, offA_col, incA_col, lda); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } else { viennacl::vector_base v1(x, n, offx, incx); viennacl::vector_base v2(y, m, offy, incy); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, lda, n, offA_col, incA_col, n); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, double beta, cl_mem y, ViennaCLInt offy, ViennaCLInt incy) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, m, n, offA_col, incA_col, lda); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } else { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, lda, n, offA_col, incA_col, n); v2 *= beta; if (transA == ViennaCLTrans) v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } // xTRSV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLStrsv(ViennaCLBackend backend, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt n, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { if (order == ViennaCLRowMajor) { viennacl::vector_base v(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), n, offA_row, incA_row, n, n, offA_col, incA_col, lda); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } else { viennacl::vector_base v(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), n, offA_row, incA_row, lda, n, offA_col, incA_col, n); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDtrsv(ViennaCLBackend backend, ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt n, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx) { if (order == ViennaCLRowMajor) { viennacl::vector_base v(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), n, offA_row, incA_row, n, n, offA_col, incA_col, lda); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } else { viennacl::vector_base v(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), n, offA_row, incA_row, lda, n, offA_col, incA_col, n); if (transA == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } // xGER VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSger(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, float alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, m, n, offA_col, incA_col, lda); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, lda, n, offA_col, incA_col, n); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDger(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLInt m, ViennaCLInt n, double alpha, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, cl_mem y, ViennaCLInt offy, ViennaCLInt incy, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda) { if (order == ViennaCLRowMajor) { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, m, n, offA_col, incA_col, lda); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::vector_base v1(x, n, offx, incx, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::vector_base v2(y, m, offy, incy, viennacl::ocl::get_context(backend->opencl_backend.context_id)); viennacl::matrix_base mat(A, viennacl::ocl::get_context(backend->opencl_backend.context_id), m, offA_row, incA_row, lda, n, offA_col, incA_col, n); mat += alpha * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } ViennaCL-1.5.1-src/libviennacl/src/blas2.cu000644 001750 001750 00000026310 12267307531 020401 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" #include "init_vector.hpp" #include "init_matrix.hpp" //include basic scalar and vector types of ViennaCL #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/prod.hpp" // GEMV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemv(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLVector x, ViennaCLHostScalar beta, ViennaCLVector y) { viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; viennacl::backend::mem_handle A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); v2 *= beta->value_float; if (A->trans == ViennaCLTrans) v2 += alpha->value_float * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha->value_float * viennacl::linalg::prod(mat, v1); } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); v2 *= beta->value_float; if (A->trans == ViennaCLTrans) v2 += alpha->value_float * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha->value_float * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); v2 *= beta->value_double; if (A->trans == ViennaCLTrans) v2 += alpha->value_double * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha->value_double * viennacl::linalg::prod(mat, v1); } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); v2 *= beta->value_double; if (A->trans == ViennaCLTrans) v2 += alpha->value_double * viennacl::linalg::prod(viennacl::trans(mat), v1); else v2 += alpha->value_double * viennacl::linalg::prod(mat, v1); } return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xTRSV VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsv(ViennaCLMatrix A, ViennaCLVector x, ViennaCLUplo uplo) { viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); if (A->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::lower_tag()); } } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); if (A->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); if (A->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::lower_tag()); } } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); if (A->trans == ViennaCLTrans) { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(viennacl::trans(mat), v1, viennacl::linalg::lower_tag()); } else { if (uplo == ViennaCLUpper) viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::upper_tag()); else viennacl::linalg::inplace_solve(mat, v1, viennacl::linalg::lower_tag()); } } return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } // xGER VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLger(ViennaCLHostScalar alpha, ViennaCLVector x, ViennaCLVector y, ViennaCLMatrix A) { viennacl::backend::mem_handle v1_handle; viennacl::backend::mem_handle v2_handle; viennacl::backend::mem_handle A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_vector(v2_handle, y) != ViennaCLSuccess) return ViennaCLGenericFailure; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; switch (x->precision) { case ViennaCLFloat: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); mat += alpha->value_float * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); mat += alpha->value_float * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } case ViennaCLDouble: { viennacl::vector_base v1(v1_handle, x->size, x->offset, x->inc); viennacl::vector_base v2(v2_handle, y->size, y->offset, y->inc); if (A->order == ViennaCLRowMajor) { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); mat += alpha->value_double * viennacl::linalg::outer_prod(v1, v2); } else { viennacl::matrix_base mat(A_handle, A->size1, A->start1, A->stride1, A->internal_size1, A->size2, A->start2, A->stride2, A->internal_size2); mat += alpha->value_double * viennacl::linalg::outer_prod(v1, v2); } return ViennaCLSuccess; } default: return ViennaCLGenericFailure; } } ViennaCL-1.5.1-src/libviennacl/src/backend.cpp000644 001750 001750 00000002610 12267307531 021135 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendCreate(ViennaCLBackend * backend) { *backend = new ViennaCLBackend_impl(); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendSetOpenCLContextID(ViennaCLBackend backend, ViennaCLInt context_id) { backend->opencl_backend.context_id = context_id; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendDestroy(ViennaCLBackend * backend) { delete *backend; *backend = NULL; return ViennaCLSuccess; } ViennaCL-1.5.1-src/libviennacl/src/backend.cu000644 001750 001750 00000002610 12267307531 020762 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // include necessary system headers #include #include "viennacl.hpp" #include "viennacl_private.hpp" VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendCreate(ViennaCLBackend * backend) { *backend = new ViennaCLBackend_impl(); return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendSetOpenCLContextID(ViennaCLBackend backend, ViennaCLInt context_id) { backend->opencl_backend.context_id = context_id; return ViennaCLSuccess; } VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendDestroy(ViennaCLBackend * backend) { delete *backend; *backend = NULL; return ViennaCLSuccess; } ViennaCL-1.5.1-src/libviennacl/src/init_vector.hpp000644 001750 001750 00000006011 12267307531 022077 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "viennacl.hpp" #include "viennacl/backend/mem_handle.hpp" static ViennaCLStatus init_cuda_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) { #ifdef VIENNACL_WITH_CUDA h.switch_active_handle_id(viennacl::CUDA_MEMORY); h.cuda_handle().reset(x->cuda_mem); h.cuda_handle().inc(); if (x->precision == ViennaCLFloat) h.raw_size(x->inc * x->size * sizeof(float)); // not necessary, but still set for conciseness else if (x->precision == ViennaCLDouble) h.raw_size(x->inc * x->size * sizeof(double)); // not necessary, but still set for conciseness else return ViennaCLGenericFailure; return ViennaCLSuccess; #else (void)h; (void)x; return ViennaCLGenericFailure; #endif } static ViennaCLStatus init_opencl_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) { #ifdef VIENNACL_WITH_OPENCL h.switch_active_handle_id(viennacl::OPENCL_MEMORY); h.opencl_handle() = x->opencl_mem; h.opencl_handle().inc(); if (x->precision == ViennaCLFloat) h.raw_size(x->inc * x->size * sizeof(float)); // not necessary, but still set for conciseness else if (x->precision == ViennaCLDouble) h.raw_size(x->inc * x->size * sizeof(double)); // not necessary, but still set for conciseness else return ViennaCLGenericFailure; return ViennaCLSuccess; #else (void)h; (void)x; return ViennaCLGenericFailure; #endif } static ViennaCLStatus init_host_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) { h.switch_active_handle_id(viennacl::MAIN_MEMORY); h.ram_handle().reset(x->host_mem); h.ram_handle().inc(); if (x->precision == ViennaCLFloat) h.raw_size(x->inc * x->size * sizeof(float)); // not necessary, but still set for conciseness else if (x->precision == ViennaCLDouble) h.raw_size(x->inc * x->size * sizeof(double)); // not necessary, but still set for conciseness else return ViennaCLGenericFailure; return ViennaCLSuccess; } static ViennaCLStatus init_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) { switch (x->backend->backend_type) { case ViennaCLCUDA: return init_cuda_vector(h, x); case ViennaCLOpenCL: return init_opencl_vector(h, x); case ViennaCLHost: return init_host_vector(h, x); default: return ViennaCLGenericFailure; } } ViennaCL-1.5.1-src/CMakeLists.txt000644 001750 001750 00000003774 12267307463 016545 0ustar00rupprupp000000 000000 # Project setup ############### cmake_minimum_required(VERSION 2.8 FATAL_ERROR) if(COMMAND cmake_policy) cmake_policy(SET CMP0003 NEW) endif(COMMAND cmake_policy) project(ViennaCL) SET(VIENNACL_SRC_DIST ON) # # User customizations if CMake does not find Boost or OpenCL # # Set boost path here if not found automatically by CMake #SET(BOOST_ROOT "C:/Program\ Files\ (x86)/boost/boost_1_42") # adjust this on Windows #SET(BOOST_ROOT "/opt/local/include") # adjust this on MacOS or Linux # For out-of-the-box support on MacOS: IF(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") INCLUDE_DIRECTORIES("/opt/local/include") set(CMAKE_EXE_LINKER_FLAGS "-framework OpenCL") ENDIF(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # Common options - There is usually no need to change anything below this line ################ set(VERSION_MAJOR 1) set(VERSION_MINOR 5) set(VERSION_PATCH 1) set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}) list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") include(ViennaCLCommon) # Set release build: IF(DEFINED CMAKE_BUILD_TYPE) SET (CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE}) ELSE() SET (CMAKE_BUILD_TYPE Release) ENDIF() # User options ############## option(BUILD_DOXYGEN_DOCS "Build the Doxygen-generated API docs" ON) option(BUILD_MANUAL "Build the PDF manual" ON) mark_as_advanced(BUILD_DOXYGEN_DOCS BUILD_MANUAL) # Find prerequisites #################### if(BUILD_DOXYGEN_DOCS) find_package(Doxygen REQUIRED) endif() if(BUILD_MANUAL) find_package(LATEX REQUIRED) endif() # Compile options ################# include_directories(BEFORE ${PROJECT_SOURCE_DIR}) # Subdirectories ################ if(BUILD_EXAMPLES) add_subdirectory(examples) endif() if(BUILD_TESTING) add_subdirectory(tests) endif() add_subdirectory(doc) add_subdirectory(libviennacl) # Install ######### install(DIRECTORY viennacl ${CMAKE_CURRENT_BINARY_DIR}/viennacl DESTINATION ${INSTALL_INCLUDE_DIR} COMPONENT dev FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp") ViennaCL-1.5.1-src/tests/000755 001750 001750 00000000000 12267307531 015130 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/tests/CMakeLists.txt000644 001750 001750 00000012350 12267307531 017671 0ustar00rupprupp000000 000000 include_directories(${Boost_INCLUDE_DIRS}) # tests with CPU backend foreach(PROG blas3_prod_float blas3_prod_double blas3_solve_float blas3_solve_double iterators global_variables matrix_vector matrix_vector_int matrix_row_float matrix_row_double matrix_row_int matrix_col_float matrix_col_double matrix_col_int scalar scheduler_matrix scheduler_matrix_matrix scheduler_matrix_vector scheduler_sparse scheduler_vector sparse vector_float vector_double vector_int vector_uint vector_multi_inner_prod spmdm) add_executable(${PROG}-test-cpu src/${PROG}.cpp) target_link_libraries(${PROG}-test-cpu ${Boost_LIBRARIES}) add_test(${PROG}-cpu ${PROG}-test-cpu) endforeach(PROG) # tests with OpenCL backend if (ENABLE_OPENCL) foreach(PROG blas3_prod_float blas3_prod_double blas3_solve_float blas3_solve_double fft iterators generator_blas1 generator_blas2 generator_blas3 #generator_segmentation global_variables matrix_vector matrix_vector_int matrix_row_float matrix_row_double matrix_row_int matrix_col_float matrix_col_double matrix_col_int nmf qr_method scalar sparse structured-matrices svd vector_float vector_double vector_int vector_uint vector_multi_inner_prod spmdm) add_executable(${PROG}-test-opencl src/${PROG}.cpp) target_link_libraries(${PROG}-test-opencl ${OPENCL_LIBRARIES} ${Boost_LIBRARIES}) add_test(${PROG}-opencl ${PROG}-test-opencl) set_target_properties(${PROG}-test-opencl PROPERTIES COMPILE_FLAGS "-DVIENNACL_WITH_OPENCL") endforeach(PROG) include_directories(${PROJECT_SOURCE_DIR}/external) add_executable(external_linkage-opencl src/external_1.cpp src/external_2.cpp) target_link_libraries(external_linkage-opencl ${OPENCL_LIBRARIES} ${Boost_LIBRARIES}) set_target_properties(external_linkage-opencl PROPERTIES COMPILE_FLAGS "-DVIENNACL_WITH_OPENCL") endif (ENABLE_OPENCL) # tests with CUDA backend if (ENABLE_CUDA) foreach(PROG blas3_prod_float blas3_prod_double blas3_solve_float blas3_solve_double iterators global_variables matrix_vector matrix_vector_int matrix_row_float matrix_row_double matrix_row_int matrix_col_float matrix_col_double matrix_col_int scalar sparse vector_float vector_double vector_int vector_uint vector_multi_inner_prod spmdm) cuda_add_executable(${PROG}-test-cuda src/${PROG}.cu) target_link_libraries(${PROG}-test-cuda ${Boost_LIBRARIES}) add_test(${PROG}-cuda ${PROG}-test-cuda) endforeach(PROG) include_directories(${PROJECT_SOURCE_DIR}/external) cuda_add_executable(external_linkage-cuda src/external_1.cu src/external_2.cu) target_link_libraries(external_linkage-cuda ${Boost_LIBRARIES}) endif (ENABLE_CUDA) # test shared library include_directories(${PROJECT_SOURCE_DIR}/libviennacl/include/) if(ENABLE_CUDA) if(ENABLE_OPENCL) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-DVIENNACL_WITH_OPENCL") #set flags before setting executable! cuda_add_executable(libviennacl_blas1-test src/libviennacl_blas1.cu) target_link_libraries(libviennacl_blas1-test viennacl ${OPENCL_LIBRARIES}) cuda_add_executable(libviennacl_blas2-test src/libviennacl_blas2.cu) target_link_libraries(libviennacl_blas2-test viennacl ${OPENCL_LIBRARIES}) cuda_add_executable(libviennacl_blas3-test src/libviennacl_blas3.cu) target_link_libraries(libviennacl_blas3-test viennacl ${OPENCL_LIBRARIES}) else(ENABLE_OPENCL) cuda_add_executable(libviennacl_blas1-test src/libviennacl_blas1.cu) target_link_libraries(libviennacl_blas1-test viennacl) cuda_add_executable(libviennacl_blas2-test src/libviennacl_blas2.cu) target_link_libraries(libviennacl_blas2-test viennacl) cuda_add_executable(libviennacl_blas3-test src/libviennacl_blas3.cu) target_link_libraries(libviennacl_blas3-test viennacl) endif(ENABLE_OPENCL) else(ENABLE_CUDA) add_executable(libviennacl_blas1-test src/libviennacl_blas1.cpp) add_executable(libviennacl_blas2-test src/libviennacl_blas2.cpp) add_executable(libviennacl_blas3-test src/libviennacl_blas3.cpp) if(ENABLE_OPENCL) set_target_properties(libviennacl_blas1-test PROPERTIES COMPILE_FLAGS "-DVIENNACL_WITH_OPENCL") target_link_libraries(libviennacl_blas1-test viennacl ${OPENCL_LIBRARIES}) set_target_properties(libviennacl_blas2-test PROPERTIES COMPILE_FLAGS "-DVIENNACL_WITH_OPENCL") target_link_libraries(libviennacl_blas2-test viennacl ${OPENCL_LIBRARIES}) set_target_properties(libviennacl_blas3-test PROPERTIES COMPILE_FLAGS "-DVIENNACL_WITH_OPENCL") target_link_libraries(libviennacl_blas3-test viennacl ${OPENCL_LIBRARIES}) else(ENABLE_OPENCL) target_link_libraries(libviennacl_blas1-test viennacl) target_link_libraries(libviennacl_blas2-test viennacl) target_link_libraries(libviennacl_blas3-test viennacl) endif(ENABLE_OPENCL) endif(ENABLE_CUDA) add_test(libviennacl-blas1 libviennacl_blas1-test) add_test(libviennacl-blas2 libviennacl_blas2-test) add_test(libviennacl-blas3 libviennacl_blas3-test) ViennaCL-1.5.1-src/tests/src/000755 001750 001750 00000000000 12267307531 015717 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/tests/src/blas3_prod_double.cpp000644 001750 001750 00000004367 12267307531 022017 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "blas3_prod_float_double.hpp" // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: BLAS 3 routines" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/qr_method.cpp000644 001750 001750 00000020413 12267307531 020405 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /* Solutions for testdata were generated with Scilab line: M=fscanfMat('nsm1.example');e=spec(M);e=gsort(e);rr=real(e);ii=imag(e);e=cat(1, rr, ii); s=strcat(string(e), ' ');write('tmp', s); */ #ifndef NDEBUG #define NDEBUG #endif //#define VIENNACL_DEBUG_ALL #include #include #include #include #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/qr-method.hpp" #include #include #include namespace ublas = boost::numeric::ublas; typedef float ScalarType; const ScalarType EPS = 0.0001f; void read_matrix_size(std::fstream& f, std::size_t& sz) { if(!f.is_open()) { throw std::invalid_argument("File is not opened"); } f >> sz; } void read_matrix_body(std::fstream& f, viennacl::matrix& A) { if(!f.is_open()) { throw std::invalid_argument("File is not opened"); } boost::numeric::ublas::matrix h_A(A.size1(), A.size2()); for(std::size_t i = 0; i < h_A.size1(); i++) { for(std::size_t j = 0; j < h_A.size2(); j++) { ScalarType val = 0.0; f >> val; h_A(i, j) = val; } } viennacl::copy(h_A, A); } void read_vector_body(std::fstream& f, ublas::vector& v) { if(!f.is_open()) throw std::invalid_argument("File is not opened"); for(std::size_t i = 0; i < v.size(); i++) { ScalarType val = 0.0; f >> val; v[i] = val; } } bool check_tridiag(viennacl::matrix& A_orig) { ublas::matrix A(A_orig.size1(), A_orig.size2()); viennacl::copy(A_orig, A); for (unsigned int i = 0; i < A.size1(); i++) { for (unsigned int j = 0; j < A.size2(); j++) { if ((std::abs(A(i, j)) > EPS) && ((i - 1) != j) && (i != j) && ((i + 1) != j)) { // std::cout << "Failed at " << i << " " << j << " " << A(i, j) << "\n"; return false; } } } return true; } bool check_hessenberg(viennacl::matrix& A_orig) { ublas::matrix A(A_orig.size1(), A_orig.size2()); viennacl::copy(A_orig, A); for (std::size_t i = 0; i < A.size1(); i++) { for (std::size_t j = 0; j < A.size2(); j++) { if ((std::abs(A(i, j)) > EPS) && (i > (j + 1))) { // std::cout << "Failed at " << i << " " << j << " " << A(i, j) << "\n"; return false; } } } return true; } ScalarType matrix_compare(ublas::matrix& res, ublas::matrix& ref) { ScalarType diff = 0.0; ScalarType mx = 0.0; for(std::size_t i = 0; i < res.size1(); i++) { for(std::size_t j = 0; j < res.size2(); j++) { diff = std::max(diff, std::abs(res(i, j) - ref(i, j))); mx = std::max(mx, res(i, j)); } } return diff / mx; } ScalarType vector_compare(ublas::vector& res, ublas::vector& ref) { std::sort(ref.begin(), ref.end()); std::sort(res.begin(), res.end()); ScalarType diff = 0.0; ScalarType mx = 0.0; for(size_t i = 0; i < ref.size(); i++) { diff = std::max(diff, std::abs(res[i] - ref[i])); mx = std::max(mx, res[i]); } return diff / mx; } void test_eigen(const std::string& fn, bool is_symm) { std::cout << "Reading..." << "\n"; std::size_t sz; // read file std::fstream f(fn.c_str(), std::fstream::in); //read size of input matrix read_matrix_size(f, sz); std::cout << "Testing matrix of size " << sz << "-by-" << sz << std::endl; viennacl::matrix A_input(sz, sz), A_ref(sz, sz), Q(sz, sz); ublas::vector eigen_ref_re = ublas::scalar_vector(sz, 0); ublas::vector eigen_ref_im = ublas::scalar_vector(sz, 0); ublas::vector eigen_re = ublas::scalar_vector(sz, 0); ublas::vector eigen_im = ublas::scalar_vector(sz, 0); read_matrix_body(f, A_input); read_vector_body(f, eigen_ref_re); if(!is_symm) read_vector_body(f, eigen_ref_im); f.close(); A_ref = A_input; std::cout << "Calculation..." << "\n"; Timer timer; timer.start(); if(is_symm) viennacl::linalg::qr_method_sym(A_input, Q, eigen_re); else viennacl::linalg::qr_method_nsm(A_input, Q, eigen_re, eigen_im); // std::cout << A_input << "\n"; viennacl::backend::finish(); double time_spend = timer.get(); std::cout << "Verification..." << "\n"; bool is_hessenberg = check_hessenberg(A_input); bool is_tridiag = check_tridiag(A_input); ublas::matrix A_ref_ublas(sz, sz), A_input_ublas(sz, sz), Q_ublas(sz, sz), result1(sz, sz), result2(sz, sz); viennacl::copy(A_ref, A_ref_ublas); viennacl::copy(A_input, A_input_ublas); viennacl::copy(Q, Q_ublas); // compute result1 = ublas::prod(Q_ublas, A_input_ublas); (terribly slow when using ublas directly) for (std::size_t i=0; i #include // // *** Boost // #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/vector.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/inner_prod.hpp" #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #include "Random.hpp" using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::scalar const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::entry_proxy const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ublas::vector const & v1, ViennaCLVectorType const & vcl_vec) { ublas::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return ublas::norm_inf(v2_cpu); } template ScalarType diff(ublas::vector_slice > const & v1, ViennaCLVectorType const & vcl_vec) { ublas::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return ublas::norm_inf(v2_cpu); } template int check(T1 const & t1, T2 const & t2, double epsilon) { int retval = EXIT_SUCCESS; double temp = std::fabs(diff(t1, t2)); if (temp > epsilon) { std::cout << "# Error! Relative difference: " << temp << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename Epsilon, typename UblasVectorType1, typename UblasVectorType2, typename UblasVectorType3, typename UblasVectorType4, typename ViennaCLVectorType1, typename ViennaCLVectorType2, typename ViennaCLVectorType3, typename ViennaCLVectorType4 > int test(Epsilon const& epsilon, UblasVectorType1 & ublas_v1, UblasVectorType2 & ublas_v2, UblasVectorType3 & ublas_v3, UblasVectorType4 & ublas_v4, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2, ViennaCLVectorType3 & vcl_v3, ViennaCLVectorType4 & vcl_v4) { int retval = EXIT_SUCCESS; for (std::size_t i=0; i(); ublas_v2[i] = NumericT(1.0) + random(); ublas_v3[i] = NumericT(1.0) + random(); ublas_v4[i] = NumericT(1.0) + random(); } viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); viennacl::copy(ublas_v3.begin(), ublas_v3.end(), vcl_v3.begin()); viennacl::copy(ublas_v4.begin(), ublas_v4.end(), vcl_v4.begin()); std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v3, vcl_v3, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v4, vcl_v4, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas::vector ref_result = ublas::scalar_vector(40, 0.0); viennacl::vector result = viennacl::scalar_vector(40, 0.0); std::cout << "Testing inner_prod with two vectors..." << std::endl; ref_result(2) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v2); viennacl::project(result, viennacl::slice(2, 3, 2)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4); viennacl::project(result, viennacl::slice(3, 4, 2)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v4)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with three vectors..." << std::endl; ref_result(1) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v3); viennacl::project(result, viennacl::slice(1, 2, 3)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(2) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(10) = ublas::inner_prod(ublas_v1, ublas_v4); viennacl::project(result, viennacl::slice(2, 4, 3)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with four vectors..." << std::endl; ref_result(4) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4); viennacl::project(result, viennacl::slice(4, 1, 4)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3, vcl_v4)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(12) = ublas::inner_prod(ublas_v1, ublas_v1); viennacl::project(result, viennacl::slice(3, 3, 4)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4, vcl_v1)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with five vectors..." << std::endl; ref_result(1) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v2); viennacl::project(result, viennacl::slice(1, 2, 5)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3, vcl_v4, vcl_v2)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(2) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(4) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(8) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(10) = ublas::inner_prod(ublas_v1, ublas_v2); viennacl::project(result, viennacl::slice(2, 2, 5)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4, vcl_v1, vcl_v2)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with eight vectors..." << std::endl; ref_result(1) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(13) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(17) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(21) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(25) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(29) = ublas::inner_prod(ublas_v1, ublas_v2); std::vector const *> vecs1(8); vecs1[0] = &vcl_v1; vecs1[1] = &vcl_v2; vecs1[2] = &vcl_v3; vecs1[3] = &vcl_v4; vecs1[4] = &vcl_v3; vecs1[5] = &vcl_v2; vecs1[6] = &vcl_v1; vecs1[7] = &vcl_v2; viennacl::vector_tuple tuple1(vecs1); viennacl::project(result, viennacl::slice(1, 4, 8)) = viennacl::linalg::inner_prod(vcl_v1, tuple1); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(11) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(13) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(15) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(17) = ublas::inner_prod(ublas_v1, ublas_v2); std::vector const *> vecs2(8); vecs2[0] = &vcl_v2; vecs2[1] = &vcl_v4; vecs2[2] = &vcl_v1; vecs2[3] = &vcl_v2; vecs2[4] = &vcl_v2; vecs2[5] = &vcl_v1; vecs2[6] = &vcl_v4; vecs2[7] = &vcl_v2; viennacl::vector_tuple tuple2(vecs2); viennacl::project(result, viennacl::slice(3, 2, 8)) = viennacl::linalg::inner_prod(vcl_v1, tuple2); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; } template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; std::size_t size = 8 * 1337; std::cout << "Running tests for vector of size " << size << std::endl; // // Set up UBLAS objects // ublas::vector ublas_full_vec1(size); ublas::vector ublas_full_vec2(ublas_full_vec1.size()); for (std::size_t i=0; i(); ublas_full_vec2[i] = NumericT(1.0) + random(); } ublas::slice s1( ublas_full_vec1.size() / 8, 3, ublas_full_vec1.size() / 8); ublas::slice s2(2 * ublas_full_vec2.size() / 8, 1, ublas_full_vec2.size() / 8); ublas::slice s3(4 * ublas_full_vec1.size() / 8, 2, ublas_full_vec1.size() / 8); ublas::slice s4(3 * ublas_full_vec2.size() / 8, 4, ublas_full_vec2.size() / 8); ublas::vector_slice< ublas::vector > ublas_slice_vec1(ublas_full_vec1, s1); ublas::vector_slice< ublas::vector > ublas_slice_vec2(ublas_full_vec2, s2); ublas::vector_slice< ublas::vector > ublas_slice_vec3(ublas_full_vec1, s3); ublas::vector_slice< ublas::vector > ublas_slice_vec4(ublas_full_vec2, s4); // // Set up ViennaCL objects // viennacl::vector vcl_full_vec1(ublas_full_vec1.size()); viennacl::vector vcl_full_vec2(ublas_full_vec2.size()); viennacl::fast_copy(ublas_full_vec1.begin(), ublas_full_vec1.end(), vcl_full_vec1.begin()); viennacl::copy (ublas_full_vec2.begin(), ublas_full_vec2.end(), vcl_full_vec2.begin()); viennacl::slice vcl_s1( vcl_full_vec1.size() / 8, 3, vcl_full_vec1.size() / 8); viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 8, 1, vcl_full_vec2.size() / 8); viennacl::slice vcl_s3(4 * vcl_full_vec1.size() / 8, 2, vcl_full_vec1.size() / 8); viennacl::slice vcl_s4(3 * vcl_full_vec2.size() / 8, 4, vcl_full_vec2.size() / 8); viennacl::vector_slice< viennacl::vector > vcl_slice_vec1(vcl_full_vec1, vcl_s1); viennacl::vector_slice< viennacl::vector > vcl_slice_vec2(vcl_full_vec2, vcl_s2); viennacl::vector_slice< viennacl::vector > vcl_slice_vec3(vcl_full_vec1, vcl_s3); viennacl::vector_slice< viennacl::vector > vcl_slice_vec4(vcl_full_vec2, vcl_s4); viennacl::vector vcl_short_vec1(vcl_slice_vec1); viennacl::vector vcl_short_vec2 = vcl_slice_vec2; viennacl::vector vcl_short_vec3 = vcl_slice_vec2 + vcl_slice_vec1; viennacl::vector vcl_short_vec4 = vcl_short_vec1 + vcl_slice_vec2; ublas::vector ublas_short_vec1(ublas_slice_vec1); ublas::vector ublas_short_vec2(ublas_slice_vec2); ublas::vector ublas_short_vec3 = ublas_slice_vec2 + ublas_slice_vec1; ublas::vector ublas_short_vec4 = ublas_short_vec1 + ublas_slice_vec2; std::cout << "Testing creation of vectors from slice..." << std::endl; if (check(ublas_short_vec1, vcl_short_vec1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec3, vcl_short_vec3, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec4, vcl_short_vec4, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // Now start running tests for vectors, ranges and slices: // std::cout << " ** [vector|vector|vector|vector] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_short_vec2, ublas_short_vec2, ublas_short_vec2, vcl_short_vec1, vcl_short_vec2, vcl_short_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|vector|vector|slice] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_short_vec2, ublas_short_vec2, ublas_slice_vec2, vcl_short_vec1, vcl_short_vec2, vcl_short_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|vector|slice|vector] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_short_vec2, ublas_slice_vec2, ublas_short_vec2, vcl_short_vec1, vcl_short_vec2, vcl_slice_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|vector|slice|slice] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_short_vec2, ublas_slice_vec2, ublas_slice_vec2, vcl_short_vec1, vcl_short_vec2, vcl_slice_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|slice|vector|vector] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_slice_vec2, ublas_short_vec2, ublas_short_vec2, vcl_short_vec1, vcl_slice_vec2, vcl_short_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|slice|vector|slice] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_slice_vec2, ublas_short_vec2, ublas_slice_vec2, vcl_short_vec1, vcl_slice_vec2, vcl_short_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|slice|slice|vector] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_slice_vec2, ublas_slice_vec2, ublas_short_vec2, vcl_short_vec1, vcl_slice_vec2, vcl_slice_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|slice|slice|slice] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_slice_vec2, ublas_slice_vec2, ublas_slice_vec2, vcl_short_vec1, vcl_slice_vec2, vcl_slice_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; ////////////////// std::cout << " ** [slice|vector|vector|vector] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_short_vec2, ublas_short_vec2, ublas_short_vec2, vcl_slice_vec1, vcl_short_vec2, vcl_short_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|vector|vector|slice] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_short_vec2, ublas_short_vec2, ublas_slice_vec2, vcl_slice_vec1, vcl_short_vec2, vcl_short_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|vector|slice|vector] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_short_vec2, ublas_slice_vec2, ublas_short_vec2, vcl_slice_vec1, vcl_short_vec2, vcl_slice_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|vector|slice|slice] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_short_vec2, ublas_slice_vec2, ublas_slice_vec2, vcl_slice_vec1, vcl_short_vec2, vcl_slice_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|slice|vector|vector] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_slice_vec2, ublas_short_vec2, ublas_short_vec2, vcl_slice_vec1, vcl_slice_vec2, vcl_short_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|slice|vector|slice] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_slice_vec2, ublas_short_vec2, ublas_slice_vec2, vcl_slice_vec1, vcl_slice_vec2, vcl_short_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|slice|slice|vector] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_slice_vec2, ublas_slice_vec2, ublas_short_vec2, vcl_slice_vec1, vcl_slice_vec2, vcl_slice_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|slice|slice|slice] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_slice_vec2, ublas_slice_vec2, ublas_slice_vec2, vcl_slice_vec1, vcl_slice_vec2, vcl_slice_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; return EXIT_SUCCESS; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector multiple inner products" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = static_cast(1.0E-4); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/global_variables.cu000644 001750 001750 00000004665 12267307531 021553 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include #include // // *** ViennaCL // #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/coordinate_matrix.hpp" #include "viennacl/ell_matrix.hpp" #include "viennacl/hyb_matrix.hpp" #ifdef VIENNACL_WITH_OPENCL #include "viennacl/circulant_matrix.hpp" #include "viennacl/hankel_matrix.hpp" #include "viennacl/toeplitz_matrix.hpp" #include "viennacl/vandermonde_matrix.hpp" #endif viennacl::scalar s1; viennacl::scalar s2; viennacl::vector v1; viennacl::vector v2; viennacl::matrix m1; //viennacl::matrix m2; // TODO: Add checks for other types // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Instantiation of global variables" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; s1 = viennacl::scalar(1.0f); s2 = viennacl::scalar(1); v1 = viennacl::vector(5); v2 = viennacl::vector(5); m1 = viennacl::matrix(5, 4); //m2 = viennacl::matrix(5, 4); std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } // // ------------------------------------------------------------- // ViennaCL-1.5.1-src/tests/src/blas3_solve_double.cu000644 001750 001750 00000050035 12267307531 022021 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ //#define NDEBUG //#define VIENNACL_DEBUG_BUILD // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL //#define VIENNACL_DEBUG_BUILD #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/matrix_proxy.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "examples/tutorial/Random.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(fabs(s1), fabs(s2)); return 0; } template ScalarType diff(ublas::vector & v1, viennacl::vector & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); viennacl::backend::finish(); for (std::size_t i=0;i 0 ) v2_cpu[i] = fabs(v2_cpu[i] - v1[i]) / std::max( fabs(v2_cpu[i]), fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return norm_inf(v2_cpu); } template ScalarType diff(ublas::matrix & mat1, VCLMatrixType & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } // // Triangular solvers // template void run_solver_check(RHSTypeRef & B_ref, RHSTypeCheck & B_check, int & retval, Epsilon const & epsilon) { double act_diff = fabs(diff(B_ref, B_check)); if( act_diff > epsilon ) { std::cout << " FAILED!" << std::endl; std::cout << "# Error at operation: matrix-matrix solve" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << " passed! " << act_diff << std::endl; } template< typename NumericT, typename Epsilon, typename ReferenceMatrixTypeA, typename ReferenceMatrixTypeB, typename ReferenceMatrixTypeC, typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC, typename MatrixTypeResult> int test_solve(Epsilon const& epsilon, ReferenceMatrixTypeA const & A, ReferenceMatrixTypeB const & B_start, ReferenceMatrixTypeC const & C_start, MatrixTypeA const & vcl_A, MatrixTypeB & vcl_B, MatrixTypeC & vcl_C, MatrixTypeResult const & ) { int retval = EXIT_SUCCESS; // -------------------------------------------------------------------------- ReferenceMatrixTypeA result; ReferenceMatrixTypeC C_trans; ReferenceMatrixTypeB B = B_start; ReferenceMatrixTypeC C = C_start; MatrixTypeResult vcl_result; // Test: A \ B with various tags -------------------------------------------------------------------------- std::cout << "Testing A \\ B: " << std::endl; std::cout << " * upper_tag: "; result = ublas::solve(A, B, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_upper_tag: "; result = ublas::solve(A, B, ublas::unit_upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::unit_upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * lower_tag: "; result = ublas::solve(A, B, ublas::lower_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_lower_tag: "; result = ublas::solve(A, B, ublas::unit_lower_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::unit_lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A \\ B passed!" << std::endl; B = B_start; C = C_start; // Test: A \ B^T -------------------------------------------------------------------------- std::cout << "Testing A \\ B^T: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); //check solve(): result = ublas::solve(A, C_trans, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, trans(vcl_C), viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); //check compute kernels: std::cout << " * upper_tag: "; ublas::inplace_solve(A, C_trans, ublas::upper_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::unit_upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::lower_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::unit_lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A \\ B^T passed!" << std::endl; B = B_start; C = C_start; // Test: A \ B with various tags -------------------------------------------------------------------------- std::cout << "Testing A^T \\ B: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::unit_upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::unit_upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::lower_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::unit_lower_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::unit_lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A^T \\ B passed!" << std::endl; B = B_start; C = C_start; // Test: A^T \ B^T -------------------------------------------------------------------------- std::cout << "Testing A^T \\ B^T: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); //check solve(): result = ublas::solve(trans(A), C_trans, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); //check kernels: std::cout << " * upper_tag: "; ublas::inplace_solve(trans(A), C_trans, ublas::upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::unit_upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::unit_lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A^T \\ B^T passed!" << std::endl; return retval; } template< typename NumericT, typename F_A, typename F_B, typename Epsilon > int test_solve(Epsilon const& epsilon) { int ret = EXIT_SUCCESS; long matrix_size = 135; //some odd number, not too large long rhs_num = 67; std::cout << "--- Part 2: Testing matrix-matrix solver ---" << std::endl; ublas::matrix A(matrix_size, matrix_size); ublas::matrix B_start(matrix_size, rhs_num); ublas::matrix C_start(rhs_num, matrix_size); for (std::size_t i = 0; i < A.size1(); ++i) { for (std::size_t j = 0; j < A.size2(); ++j) A(i,j) = static_cast(-0.5) * random(); A(i,i) = NumericT(1.0) + NumericT(2.0) * random(); //some extra weight on diagonal for stability } for (std::size_t i = 0; i < B_start.size1(); ++i) for (std::size_t j = 0; j < B_start.size2(); ++j) B_start(i,j) = random(); for (std::size_t i = 0; i < C_start.size1(); ++i) for (std::size_t j = 0; j < C_start.size2(); ++j) C_start(i,j) = random(); // A viennacl::range range1_A(matrix_size, 2*matrix_size); viennacl::range range2_A(2*matrix_size, 3*matrix_size); viennacl::slice slice1_A(matrix_size, 2, matrix_size); viennacl::slice slice2_A(0, 3, matrix_size); viennacl::matrix vcl_A(matrix_size, matrix_size); viennacl::copy(A, vcl_A); viennacl::matrix vcl_big_range_A(4*matrix_size, 4*matrix_size); viennacl::matrix_range > vcl_range_A(vcl_big_range_A, range1_A, range2_A); viennacl::copy(A, vcl_range_A); viennacl::matrix vcl_big_slice_A(4*matrix_size, 4*matrix_size); viennacl::matrix_slice > vcl_slice_A(vcl_big_slice_A, slice1_A, slice2_A); viennacl::copy(A, vcl_slice_A); // B viennacl::range range1_B(matrix_size, 2*matrix_size); viennacl::range range2_B(2*rhs_num, 3*rhs_num); viennacl::slice slice1_B(matrix_size, 2, matrix_size); viennacl::slice slice2_B(0, 3, rhs_num); viennacl::matrix vcl_B(matrix_size, rhs_num); viennacl::copy(B_start, vcl_B); viennacl::matrix vcl_big_range_B(4*matrix_size, 4*rhs_num); viennacl::matrix_range > vcl_range_B(vcl_big_range_B, range1_B, range2_B); viennacl::copy(B_start, vcl_range_B); viennacl::matrix vcl_big_slice_B(4*matrix_size, 4*rhs_num); viennacl::matrix_slice > vcl_slice_B(vcl_big_slice_B, slice1_B, slice2_B); viennacl::copy(B_start, vcl_slice_B); // C viennacl::range range1_C(rhs_num, 2*rhs_num); viennacl::range range2_C(2*matrix_size, 3*matrix_size); viennacl::slice slice1_C(rhs_num, 2, rhs_num); viennacl::slice slice2_C(0, 3, matrix_size); viennacl::matrix vcl_C(rhs_num, matrix_size); viennacl::copy(C_start, vcl_C); viennacl::matrix vcl_big_range_C(4*rhs_num, 4*matrix_size); viennacl::matrix_range > vcl_range_C(vcl_big_range_C, range1_C, range2_C); viennacl::copy(C_start, vcl_range_C); viennacl::matrix vcl_big_slice_C(4*rhs_num, 4*matrix_size); viennacl::matrix_slice > vcl_slice_C(vcl_big_slice_C, slice1_C, slice2_C); viennacl::copy(C_start, vcl_slice_C); std::cout << "Now using A=matrix, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=matrix, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=matrix, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; return ret; } // // Control functions // template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=row ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=col ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=row ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=col ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; return ret; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: BLAS 3 routines" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/iterators.cu000644 001750 001750 00000007045 12267307531 020272 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include // // *** ViennaCL // //#define VCL_BUILD_INFO //#define VIENNACL_WITH_UBLAS 1 #include "viennacl/matrix.hpp" #include "viennacl/vector.hpp" // // ------------------------------------------------------------- // template< typename NumericT > int test() { int retval = EXIT_SUCCESS; // -------------------------------------------------------------------------- typedef viennacl::vector VclVector; VclVector vcl_cont(3); vcl_cont[0] = 1; vcl_cont[1] = 2; vcl_cont[2] = 3; //typename VclVector::const_iterator const_iter_def_const; //typename VclVector::iterator iter_def_const; for(typename VclVector::const_iterator iter = vcl_cont.begin(); iter != vcl_cont.end(); iter++) { std::cout << *iter << std::endl; } for(typename VclVector::iterator iter = vcl_cont.begin(); iter != vcl_cont.end(); iter++) { std::cout << *iter << std::endl; } // -------------------------------------------------------------------------- return retval; } int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Iterators" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: float" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: double" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/libviennacl_blas3.cpp000644 001750 001750 00000066505 12267307531 022011 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /* * * Testing the ViennaCL BLAS-like shared library * */ // include necessary system headers #include #include // Some helper functions for this tutorial: #include "viennacl.hpp" #include "examples/tutorial/Random.hpp" #include "viennacl/vector.hpp" template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(std::vector const & v1, ViennaCLVectorType const & vcl_vec) { std::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); ScalarType inf_norm = 0; for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; if (v2_cpu[i] > inf_norm) inf_norm = v2_cpu[i]; } return inf_norm; } template void check(T const & t, U const & u, EpsilonT eps) { EpsilonT rel_error = diff(t,u); if (rel_error > eps) { std::cerr << "Relative error: " << rel_error << std::endl; std::cerr << "Aborting!" << std::endl; exit(EXIT_FAILURE); } std::cout << "SUCCESS "; } template T get_value(std::vector & array, ViennaCLInt i, ViennaCLInt j, ViennaCLInt start1, ViennaCLInt start2, ViennaCLInt stride1, ViennaCLInt stride2, ViennaCLInt rows, ViennaCLInt cols, ViennaCLOrder order, ViennaCLTranspose trans) { // row-major if (order == ViennaCLRowMajor && trans == ViennaCLTrans) return array[(j*stride1 + start1) * cols + (i*stride2 + start2)]; else if (order == ViennaCLRowMajor && trans != ViennaCLTrans) return array[(i*stride1 + start1) * cols + (j*stride2 + start2)]; // column-major else if (order != ViennaCLRowMajor && trans == ViennaCLTrans) return array[(j*stride1 + start1) + (i*stride2 + start2) * rows]; return array[(i*stride1 + start1) + (j*stride2 + start2) * rows]; } void test_blas(ViennaCLBackend my_backend, float eps_float, double eps_double, std::vector & C_float, std::vector & C_double, std::vector & A_float, std::vector & A_double, std::vector & B_float, std::vector & B_double, ViennaCLOrder order_C, ViennaCLOrder order_A, ViennaCLOrder order_B, ViennaCLTranspose trans_A, ViennaCLTranspose trans_B, viennacl::vector & host_C_float, viennacl::vector & host_C_double, viennacl::vector & host_A_float, viennacl::vector & host_A_double, viennacl::vector & host_B_float, viennacl::vector & host_B_double #ifdef VIENNACL_WITH_CUDA , viennacl::vector & cuda_C_float, viennacl::vector & cuda_C_double , viennacl::vector & cuda_A_float, viennacl::vector & cuda_A_double , viennacl::vector & cuda_B_float, viennacl::vector & cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , viennacl::vector & opencl_C_float, viennacl::vector * opencl_C_double , viennacl::vector & opencl_A_float, viennacl::vector * opencl_A_double , viennacl::vector & opencl_B_float, viennacl::vector * opencl_B_double #endif ) { ViennaCLInt C_size1 = 42; ViennaCLInt C_size2 = 43; ViennaCLInt C_start1 = 10; ViennaCLInt C_start2 = 11; ViennaCLInt C_stride1 = 2; ViennaCLInt C_stride2 = 3; ViennaCLInt C_rows = C_size1 * C_stride1 + C_start1 + 5; ViennaCLInt C_columns = C_size2 * C_stride2 + C_start2 + 5; ViennaCLInt A_size1 = trans_A ? 44 : 42; ViennaCLInt A_size2 = trans_A ? 42 : 44; ViennaCLInt A_start1 = 12; ViennaCLInt A_start2 = 13; ViennaCLInt A_stride1 = 4; ViennaCLInt A_stride2 = 5; ViennaCLInt A_rows = A_size1 * A_stride1 + A_start1 + 5; ViennaCLInt A_columns = A_size2 * A_stride2 + A_start2 + 5; ViennaCLInt B_size1 = trans_B ? 43 : 44; ViennaCLInt B_size2 = trans_B ? 44 : 43; ViennaCLInt B_start1 = 14; ViennaCLInt B_start2 = 15; ViennaCLInt B_stride1 = 6; ViennaCLInt B_stride2 = 7; ViennaCLInt B_rows = B_size1 * B_stride1 + B_start1 + 5; ViennaCLInt B_columns = B_size2 * B_stride2 + B_start2 + 5; // Compute reference: ViennaCLInt size_k = trans_A ? A_size1 : A_size2; for (ViennaCLInt i=0; i(host_A_float), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::linalg::host_based::detail::extract_raw_pointer(host_B_float), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0f, viennacl::linalg::host_based::detail::extract_raw_pointer(host_C_float), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_float, host_C_float, eps_float); ViennaCLHostDgemm(my_backend, order_A, trans_A, order_B, trans_B, order_C, C_size1, C_size2, size_k, 1.0, viennacl::linalg::host_based::detail::extract_raw_pointer(host_A_double), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::linalg::host_based::detail::extract_raw_pointer(host_B_double), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0, viennacl::linalg::host_based::detail::extract_raw_pointer(host_C_double), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_double, host_C_double, eps_double); #ifdef VIENNACL_WITH_CUDA ViennaCLCUDASgemm(my_backend, order_A, trans_A, order_B, trans_B, order_C, C_size1, C_size2, size_k, 1.0f, viennacl::linalg::cuda::detail::cuda_arg(cuda_A_float), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::linalg::cuda::detail::cuda_arg(cuda_B_float), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0f, viennacl::linalg::cuda::detail::cuda_arg(cuda_C_float), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_float, cuda_C_float, eps_float); ViennaCLCUDADgemm(my_backend, order_A, trans_A, order_B, trans_B, order_C, C_size1, C_size2, size_k, 1.0, viennacl::linalg::cuda::detail::cuda_arg(cuda_A_double), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::linalg::cuda::detail::cuda_arg(cuda_B_double), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0, viennacl::linalg::cuda::detail::cuda_arg(cuda_C_double), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_double, cuda_C_double, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL ViennaCLOpenCLSgemm(my_backend, order_A, trans_A, order_B, trans_B, order_C, C_size1, C_size2, size_k, 1.0f, viennacl::traits::opencl_handle(opencl_A_float), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::traits::opencl_handle(opencl_B_float), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0f, viennacl::traits::opencl_handle(opencl_C_float), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_float, opencl_C_float, eps_float); if (opencl_A_double != NULL && opencl_B_double != NULL && opencl_C_double != NULL) { ViennaCLOpenCLDgemm(my_backend, order_A, trans_A, order_B, trans_B, order_C, C_size1, C_size2, size_k, 1.0, viennacl::traits::opencl_handle(*opencl_A_double), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::traits::opencl_handle(*opencl_B_double), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0, viennacl::traits::opencl_handle(*opencl_C_double), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_double, *opencl_C_double, eps_double); } #endif std::cout << std::endl; } void test_blas(ViennaCLBackend my_backend, float eps_float, double eps_double, std::vector & C_float, std::vector & C_double, std::vector & A_float, std::vector & A_double, std::vector & B_float, std::vector & B_double, ViennaCLOrder order_C, ViennaCLOrder order_A, ViennaCLOrder order_B, viennacl::vector & host_C_float, viennacl::vector & host_C_double, viennacl::vector & host_A_float, viennacl::vector & host_A_double, viennacl::vector & host_B_float, viennacl::vector & host_B_double #ifdef VIENNACL_WITH_CUDA , viennacl::vector & cuda_C_float, viennacl::vector & cuda_C_double , viennacl::vector & cuda_A_float, viennacl::vector & cuda_A_double , viennacl::vector & cuda_B_float, viennacl::vector & cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , viennacl::vector & opencl_C_float, viennacl::vector * opencl_C_double , viennacl::vector & opencl_A_float, viennacl::vector * opencl_A_double , viennacl::vector & opencl_B_float, viennacl::vector * opencl_B_double #endif ) { std::cout << " -> trans-trans: "; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, order_C, order_A, order_B, ViennaCLTrans, ViennaCLTrans, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> trans-no: "; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, order_C, order_A, order_B, ViennaCLTrans, ViennaCLNoTrans, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> no-trans: "; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, order_C, order_A, order_B, ViennaCLNoTrans, ViennaCLTrans, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> no-no: "; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, order_C, order_A, order_B, ViennaCLNoTrans, ViennaCLNoTrans, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); } void test_blas(ViennaCLBackend my_backend, float eps_float, double eps_double, std::vector & C_float, std::vector & C_double, std::vector & A_float, std::vector & A_double, std::vector & B_float, std::vector & B_double, viennacl::vector & host_C_float, viennacl::vector & host_C_double, viennacl::vector & host_A_float, viennacl::vector & host_A_double, viennacl::vector & host_B_float, viennacl::vector & host_B_double #ifdef VIENNACL_WITH_CUDA , viennacl::vector & cuda_C_float, viennacl::vector & cuda_C_double , viennacl::vector & cuda_A_float, viennacl::vector & cuda_A_double , viennacl::vector & cuda_B_float, viennacl::vector & cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , viennacl::vector & opencl_C_float, viennacl::vector * opencl_C_double , viennacl::vector & opencl_A_float, viennacl::vector * opencl_A_double , viennacl::vector & opencl_B_float, viennacl::vector * opencl_B_double #endif ) { std::cout << " -> C: row, A: row, B: row" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLRowMajor, ViennaCLRowMajor, ViennaCLRowMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: row, A: row, B: col" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLRowMajor, ViennaCLRowMajor, ViennaCLColumnMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: row, A: col, B: row" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLRowMajor, ViennaCLColumnMajor, ViennaCLRowMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: row, A: col, B: col" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLRowMajor, ViennaCLColumnMajor, ViennaCLColumnMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: col, A: row, B: row" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLColumnMajor, ViennaCLRowMajor, ViennaCLRowMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: col, A: row, B: col" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLColumnMajor, ViennaCLRowMajor, ViennaCLColumnMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: col, A: col, B: row" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLColumnMajor, ViennaCLColumnMajor, ViennaCLRowMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: col, A: col, B: col" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLColumnMajor, ViennaCLColumnMajor, ViennaCLColumnMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); } int main() { ViennaCLInt size = 500*500; float eps_float = 1e-5f; double eps_double = 1e-12; std::vector C_float(size); std::vector A_float(size); std::vector B_float(size); std::vector C_double(size); std::vector A_double(size); std::vector B_double(size); // fill with random data: for (ViennaCLInt i = 0; i < size; ++i) { C_float[i] = 0.5f + 0.1f * random(); A_float[i] = 0.5f + 0.1f * random(); B_float[i] = 0.5f + 0.1f * random(); C_double[i] = 0.5 + 0.2 * random(); A_double[i] = 0.5 + 0.2 * random(); B_double[i] = 0.5 + 0.2 * random(); } // Host setup ViennaCLBackend my_backend; ViennaCLBackendCreate(&my_backend); viennacl::vector host_C_float(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(C_float, host_C_float); viennacl::vector host_A_float(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(A_float, host_A_float); viennacl::vector host_B_float(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(B_float, host_B_float); viennacl::vector host_C_double(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(C_double, host_C_double); viennacl::vector host_A_double(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(A_double, host_A_double); viennacl::vector host_B_double(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(B_double, host_B_double); // CUDA setup #ifdef VIENNACL_WITH_CUDA viennacl::vector cuda_C_float(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(C_float, cuda_C_float); viennacl::vector cuda_A_float(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(A_float, cuda_A_float); viennacl::vector cuda_B_float(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(B_float, cuda_B_float); viennacl::vector cuda_C_double(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(C_double, cuda_C_double); viennacl::vector cuda_A_double(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(A_double, cuda_A_double); viennacl::vector cuda_B_double(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(B_double, cuda_B_double); #endif // OpenCL setup #ifdef VIENNACL_WITH_OPENCL ViennaCLInt context_id = 0; viennacl::vector opencl_C_float(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(C_float, opencl_C_float); viennacl::vector opencl_A_float(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(A_float, opencl_A_float); viennacl::vector opencl_B_float(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(B_float, opencl_B_float); viennacl::vector *opencl_C_double = NULL; viennacl::vector *opencl_A_double = NULL; viennacl::vector *opencl_B_double = NULL; if( viennacl::ocl::current_device().double_support() ) { opencl_C_double = new viennacl::vector(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(C_double, *opencl_C_double); opencl_A_double = new viennacl::vector(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(A_double, *opencl_A_double); opencl_B_double = new viennacl::vector(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(B_double, *opencl_B_double); } ViennaCLBackendSetOpenCLContextID(my_backend, context_id); #endif // consistency checks: check(C_float, host_C_float, eps_float); check(A_float, host_A_float, eps_float); check(B_float, host_B_float, eps_float); check(C_double, host_C_double, eps_double); check(A_double, host_A_double, eps_double); check(B_double, host_B_double, eps_double); #ifdef VIENNACL_WITH_CUDA check(C_float, cuda_C_float, eps_float); check(A_float, cuda_A_float, eps_float); check(B_float, cuda_B_float, eps_float); check(C_double, cuda_C_double, eps_double); check(A_double, cuda_A_double, eps_double); check(B_double, cuda_B_double, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL check(C_float, opencl_C_float, eps_float); check(A_float, opencl_A_float, eps_float); check(B_float, opencl_B_float, eps_float); if( viennacl::ocl::current_device().double_support() ) { check(C_double, *opencl_C_double, eps_double); check(A_double, *opencl_A_double, eps_double); check(B_double, *opencl_B_double, eps_double); } #endif std::cout << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double , cuda_A_float, cuda_A_double , cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double , opencl_A_float, opencl_A_double , opencl_B_float, opencl_B_double #endif ); #ifdef VIENNACL_WITH_OPENCL //cleanup if( viennacl::ocl::current_device().double_support() ) { delete opencl_C_double; delete opencl_A_double; delete opencl_B_double; } #endif ViennaCLBackendDestroy(&my_backend); // // That's it. // std::cout << std::endl << "!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/blas3_solve_float.cu000644 001750 001750 00000050035 12267307531 021654 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ //#define NDEBUG //#define VIENNACL_DEBUG_BUILD // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL //#define VIENNACL_DEBUG_BUILD #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/matrix_proxy.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "examples/tutorial/Random.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(fabs(s1), fabs(s2)); return 0; } template ScalarType diff(ublas::vector & v1, viennacl::vector & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); viennacl::backend::finish(); for (std::size_t i=0;i 0 ) v2_cpu[i] = fabs(v2_cpu[i] - v1[i]) / std::max( fabs(v2_cpu[i]), fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return norm_inf(v2_cpu); } template ScalarType diff(ublas::matrix & mat1, VCLMatrixType & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } // // Triangular solvers // template void run_solver_check(RHSTypeRef & B_ref, RHSTypeCheck & B_check, int & retval, Epsilon const & epsilon) { double act_diff = fabs(diff(B_ref, B_check)); if( act_diff > epsilon ) { std::cout << " FAILED!" << std::endl; std::cout << "# Error at operation: matrix-matrix solve" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << " passed! " << act_diff << std::endl; } template< typename NumericT, typename Epsilon, typename ReferenceMatrixTypeA, typename ReferenceMatrixTypeB, typename ReferenceMatrixTypeC, typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC, typename MatrixTypeResult> int test_solve(Epsilon const& epsilon, ReferenceMatrixTypeA const & A, ReferenceMatrixTypeB const & B_start, ReferenceMatrixTypeC const & C_start, MatrixTypeA const & vcl_A, MatrixTypeB & vcl_B, MatrixTypeC & vcl_C, MatrixTypeResult const & ) { int retval = EXIT_SUCCESS; // -------------------------------------------------------------------------- ReferenceMatrixTypeA result; ReferenceMatrixTypeC C_trans; ReferenceMatrixTypeB B = B_start; ReferenceMatrixTypeC C = C_start; MatrixTypeResult vcl_result; // Test: A \ B with various tags -------------------------------------------------------------------------- std::cout << "Testing A \\ B: " << std::endl; std::cout << " * upper_tag: "; result = ublas::solve(A, B, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_upper_tag: "; result = ublas::solve(A, B, ublas::unit_upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::unit_upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * lower_tag: "; result = ublas::solve(A, B, ublas::lower_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_lower_tag: "; result = ublas::solve(A, B, ublas::unit_lower_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::unit_lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A \\ B passed!" << std::endl; B = B_start; C = C_start; // Test: A \ B^T -------------------------------------------------------------------------- std::cout << "Testing A \\ B^T: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); //check solve(): result = ublas::solve(A, C_trans, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, trans(vcl_C), viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); //check compute kernels: std::cout << " * upper_tag: "; ublas::inplace_solve(A, C_trans, ublas::upper_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::unit_upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::lower_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::unit_lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A \\ B^T passed!" << std::endl; B = B_start; C = C_start; // Test: A \ B with various tags -------------------------------------------------------------------------- std::cout << "Testing A^T \\ B: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::unit_upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::unit_upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::lower_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::unit_lower_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::unit_lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A^T \\ B passed!" << std::endl; B = B_start; C = C_start; // Test: A^T \ B^T -------------------------------------------------------------------------- std::cout << "Testing A^T \\ B^T: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); //check solve(): result = ublas::solve(trans(A), C_trans, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); //check kernels: std::cout << " * upper_tag: "; ublas::inplace_solve(trans(A), C_trans, ublas::upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::unit_upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::unit_lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A^T \\ B^T passed!" << std::endl; return retval; } template< typename NumericT, typename F_A, typename F_B, typename Epsilon > int test_solve(Epsilon const& epsilon) { int ret = EXIT_SUCCESS; long matrix_size = 135; //some odd number, not too large long rhs_num = 67; std::cout << "--- Part 2: Testing matrix-matrix solver ---" << std::endl; ublas::matrix A(matrix_size, matrix_size); ublas::matrix B_start(matrix_size, rhs_num); ublas::matrix C_start(rhs_num, matrix_size); for (std::size_t i = 0; i < A.size1(); ++i) { for (std::size_t j = 0; j < A.size2(); ++j) A(i,j) = static_cast(-0.5) * random(); A(i,i) = NumericT(1.0) + NumericT(2.0) * random(); //some extra weight on diagonal for stability } for (std::size_t i = 0; i < B_start.size1(); ++i) for (std::size_t j = 0; j < B_start.size2(); ++j) B_start(i,j) = random(); for (std::size_t i = 0; i < C_start.size1(); ++i) for (std::size_t j = 0; j < C_start.size2(); ++j) C_start(i,j) = random(); // A viennacl::range range1_A(matrix_size, 2*matrix_size); viennacl::range range2_A(2*matrix_size, 3*matrix_size); viennacl::slice slice1_A(matrix_size, 2, matrix_size); viennacl::slice slice2_A(0, 3, matrix_size); viennacl::matrix vcl_A(matrix_size, matrix_size); viennacl::copy(A, vcl_A); viennacl::matrix vcl_big_range_A(4*matrix_size, 4*matrix_size); viennacl::matrix_range > vcl_range_A(vcl_big_range_A, range1_A, range2_A); viennacl::copy(A, vcl_range_A); viennacl::matrix vcl_big_slice_A(4*matrix_size, 4*matrix_size); viennacl::matrix_slice > vcl_slice_A(vcl_big_slice_A, slice1_A, slice2_A); viennacl::copy(A, vcl_slice_A); // B viennacl::range range1_B(matrix_size, 2*matrix_size); viennacl::range range2_B(2*rhs_num, 3*rhs_num); viennacl::slice slice1_B(matrix_size, 2, matrix_size); viennacl::slice slice2_B(0, 3, rhs_num); viennacl::matrix vcl_B(matrix_size, rhs_num); viennacl::copy(B_start, vcl_B); viennacl::matrix vcl_big_range_B(4*matrix_size, 4*rhs_num); viennacl::matrix_range > vcl_range_B(vcl_big_range_B, range1_B, range2_B); viennacl::copy(B_start, vcl_range_B); viennacl::matrix vcl_big_slice_B(4*matrix_size, 4*rhs_num); viennacl::matrix_slice > vcl_slice_B(vcl_big_slice_B, slice1_B, slice2_B); viennacl::copy(B_start, vcl_slice_B); // C viennacl::range range1_C(rhs_num, 2*rhs_num); viennacl::range range2_C(2*matrix_size, 3*matrix_size); viennacl::slice slice1_C(rhs_num, 2, rhs_num); viennacl::slice slice2_C(0, 3, matrix_size); viennacl::matrix vcl_C(rhs_num, matrix_size); viennacl::copy(C_start, vcl_C); viennacl::matrix vcl_big_range_C(4*rhs_num, 4*matrix_size); viennacl::matrix_range > vcl_range_C(vcl_big_range_C, range1_C, range2_C); viennacl::copy(C_start, vcl_range_C); viennacl::matrix vcl_big_slice_C(4*rhs_num, 4*matrix_size); viennacl::matrix_slice > vcl_slice_C(vcl_big_slice_C, slice1_C, slice2_C); viennacl::copy(C_start, vcl_slice_C); std::cout << "Now using A=matrix, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=matrix, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=matrix, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; return ret; } // // Control functions // template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=row ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=col ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=row ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=col ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; return ret; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: BLAS 3 routines" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/blas3_solve_double.cpp000644 001750 001750 00000050035 12267307531 022174 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ //#define NDEBUG //#define VIENNACL_DEBUG_BUILD // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL //#define VIENNACL_DEBUG_BUILD #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/matrix_proxy.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "examples/tutorial/Random.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(fabs(s1), fabs(s2)); return 0; } template ScalarType diff(ublas::vector & v1, viennacl::vector & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); viennacl::backend::finish(); for (std::size_t i=0;i 0 ) v2_cpu[i] = fabs(v2_cpu[i] - v1[i]) / std::max( fabs(v2_cpu[i]), fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return norm_inf(v2_cpu); } template ScalarType diff(ublas::matrix & mat1, VCLMatrixType & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } // // Triangular solvers // template void run_solver_check(RHSTypeRef & B_ref, RHSTypeCheck & B_check, int & retval, Epsilon const & epsilon) { double act_diff = fabs(diff(B_ref, B_check)); if( act_diff > epsilon ) { std::cout << " FAILED!" << std::endl; std::cout << "# Error at operation: matrix-matrix solve" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << " passed! " << act_diff << std::endl; } template< typename NumericT, typename Epsilon, typename ReferenceMatrixTypeA, typename ReferenceMatrixTypeB, typename ReferenceMatrixTypeC, typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC, typename MatrixTypeResult> int test_solve(Epsilon const& epsilon, ReferenceMatrixTypeA const & A, ReferenceMatrixTypeB const & B_start, ReferenceMatrixTypeC const & C_start, MatrixTypeA const & vcl_A, MatrixTypeB & vcl_B, MatrixTypeC & vcl_C, MatrixTypeResult const & ) { int retval = EXIT_SUCCESS; // -------------------------------------------------------------------------- ReferenceMatrixTypeA result; ReferenceMatrixTypeC C_trans; ReferenceMatrixTypeB B = B_start; ReferenceMatrixTypeC C = C_start; MatrixTypeResult vcl_result; // Test: A \ B with various tags -------------------------------------------------------------------------- std::cout << "Testing A \\ B: " << std::endl; std::cout << " * upper_tag: "; result = ublas::solve(A, B, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_upper_tag: "; result = ublas::solve(A, B, ublas::unit_upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::unit_upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * lower_tag: "; result = ublas::solve(A, B, ublas::lower_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_lower_tag: "; result = ublas::solve(A, B, ublas::unit_lower_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::unit_lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A \\ B passed!" << std::endl; B = B_start; C = C_start; // Test: A \ B^T -------------------------------------------------------------------------- std::cout << "Testing A \\ B^T: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); //check solve(): result = ublas::solve(A, C_trans, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, trans(vcl_C), viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); //check compute kernels: std::cout << " * upper_tag: "; ublas::inplace_solve(A, C_trans, ublas::upper_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::unit_upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::lower_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::unit_lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A \\ B^T passed!" << std::endl; B = B_start; C = C_start; // Test: A \ B with various tags -------------------------------------------------------------------------- std::cout << "Testing A^T \\ B: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::unit_upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::unit_upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::lower_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::unit_lower_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::unit_lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A^T \\ B passed!" << std::endl; B = B_start; C = C_start; // Test: A^T \ B^T -------------------------------------------------------------------------- std::cout << "Testing A^T \\ B^T: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); //check solve(): result = ublas::solve(trans(A), C_trans, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); //check kernels: std::cout << " * upper_tag: "; ublas::inplace_solve(trans(A), C_trans, ublas::upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::unit_upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::unit_lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A^T \\ B^T passed!" << std::endl; return retval; } template< typename NumericT, typename F_A, typename F_B, typename Epsilon > int test_solve(Epsilon const& epsilon) { int ret = EXIT_SUCCESS; long matrix_size = 135; //some odd number, not too large long rhs_num = 67; std::cout << "--- Part 2: Testing matrix-matrix solver ---" << std::endl; ublas::matrix A(matrix_size, matrix_size); ublas::matrix B_start(matrix_size, rhs_num); ublas::matrix C_start(rhs_num, matrix_size); for (std::size_t i = 0; i < A.size1(); ++i) { for (std::size_t j = 0; j < A.size2(); ++j) A(i,j) = static_cast(-0.5) * random(); A(i,i) = NumericT(1.0) + NumericT(2.0) * random(); //some extra weight on diagonal for stability } for (std::size_t i = 0; i < B_start.size1(); ++i) for (std::size_t j = 0; j < B_start.size2(); ++j) B_start(i,j) = random(); for (std::size_t i = 0; i < C_start.size1(); ++i) for (std::size_t j = 0; j < C_start.size2(); ++j) C_start(i,j) = random(); // A viennacl::range range1_A(matrix_size, 2*matrix_size); viennacl::range range2_A(2*matrix_size, 3*matrix_size); viennacl::slice slice1_A(matrix_size, 2, matrix_size); viennacl::slice slice2_A(0, 3, matrix_size); viennacl::matrix vcl_A(matrix_size, matrix_size); viennacl::copy(A, vcl_A); viennacl::matrix vcl_big_range_A(4*matrix_size, 4*matrix_size); viennacl::matrix_range > vcl_range_A(vcl_big_range_A, range1_A, range2_A); viennacl::copy(A, vcl_range_A); viennacl::matrix vcl_big_slice_A(4*matrix_size, 4*matrix_size); viennacl::matrix_slice > vcl_slice_A(vcl_big_slice_A, slice1_A, slice2_A); viennacl::copy(A, vcl_slice_A); // B viennacl::range range1_B(matrix_size, 2*matrix_size); viennacl::range range2_B(2*rhs_num, 3*rhs_num); viennacl::slice slice1_B(matrix_size, 2, matrix_size); viennacl::slice slice2_B(0, 3, rhs_num); viennacl::matrix vcl_B(matrix_size, rhs_num); viennacl::copy(B_start, vcl_B); viennacl::matrix vcl_big_range_B(4*matrix_size, 4*rhs_num); viennacl::matrix_range > vcl_range_B(vcl_big_range_B, range1_B, range2_B); viennacl::copy(B_start, vcl_range_B); viennacl::matrix vcl_big_slice_B(4*matrix_size, 4*rhs_num); viennacl::matrix_slice > vcl_slice_B(vcl_big_slice_B, slice1_B, slice2_B); viennacl::copy(B_start, vcl_slice_B); // C viennacl::range range1_C(rhs_num, 2*rhs_num); viennacl::range range2_C(2*matrix_size, 3*matrix_size); viennacl::slice slice1_C(rhs_num, 2, rhs_num); viennacl::slice slice2_C(0, 3, matrix_size); viennacl::matrix vcl_C(rhs_num, matrix_size); viennacl::copy(C_start, vcl_C); viennacl::matrix vcl_big_range_C(4*rhs_num, 4*matrix_size); viennacl::matrix_range > vcl_range_C(vcl_big_range_C, range1_C, range2_C); viennacl::copy(C_start, vcl_range_C); viennacl::matrix vcl_big_slice_C(4*rhs_num, 4*matrix_size); viennacl::matrix_slice > vcl_slice_C(vcl_big_slice_C, slice1_C, slice2_C); viennacl::copy(C_start, vcl_slice_C); std::cout << "Now using A=matrix, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=matrix, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=matrix, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; return ret; } // // Control functions // template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=row ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=col ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=row ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=col ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; return ret; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: BLAS 3 routines" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/matrix_vector_int.cu000644 001750 001750 00000071206 12267307531 022016 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/lu.hpp" #include "examples/tutorial/Random.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return 1; return 0; } template ScalarType diff(ublas::vector const & v1, VCLVectorType const & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i ScalarType diff(ublas::matrix const & mat1, VCLMatrixType const & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { if (mat2_cpu(i,j) != mat1(i,j)) return 1; } } //std::cout << ret << std::endl; return 0; } // // ------------------------------------------------------------- // template int test_prod_rank1(UblasMatrixType & ublas_m1, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, VCLMatrixType & vcl_m1, VCLVectorType1 & vcl_v1, VCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; // sync data: ublas_v1 = ublas::scalar_vector(ublas_v1.size(), NumericT(2)); ublas_v2 = ublas::scalar_vector(ublas_v2.size(), NumericT(3)); viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); viennacl::copy(ublas_m1, vcl_m1); // -------------------------------------------------------------------------- std::cout << "Rank 1 update" << std::endl; ublas_m1 += ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2); if( diff(ublas_m1, vcl_m1) != 0 ) { std::cout << "# Error at operation: rank 1 update" << std::endl; std::cout << " diff: " << diff(ublas_m1, vcl_m1) << std::endl; return EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Scaled rank 1 update - CPU Scalar" << std::endl; ublas_m1 += NumericT(4) * ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += NumericT(2) * viennacl::linalg::outer_prod(vcl_v1, vcl_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2) * NumericT(2); //check proper compilation if( diff(ublas_m1, vcl_m1) != 0 ) { std::cout << "# Error at operation: scaled rank 1 update - CPU Scalar" << std::endl; std::cout << " diff: " << diff(ublas_m1, vcl_m1) << std::endl; return EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Scaled rank 1 update - GPU Scalar" << std::endl; ublas_m1 += NumericT(4) * ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += viennacl::scalar(2) * viennacl::linalg::outer_prod(vcl_v1, vcl_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2) * viennacl::scalar(2); //check proper compilation if( diff(ublas_m1, vcl_m1) != 0 ) { std::cout << "# Error at operation: scaled rank 1 update - GPU Scalar" << std::endl; std::cout << " diff: " << diff(ublas_m1, vcl_m1) << std::endl; return EXIT_FAILURE; } //reset vcl_matrix: viennacl::copy(ublas_m1, vcl_m1); // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product" << std::endl; ublas_v1 = viennacl::linalg::prod(ublas_m1, ublas_v2); vcl_v1 = viennacl::linalg::prod(vcl_m1, vcl_v2); if( diff(ublas_v1, vcl_v1) != 0 ) { std::cout << "# Error at operation: matrix-vector product" << std::endl; std::cout << " diff: " << diff(ublas_v1, vcl_v1) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product with scaled add" << std::endl; NumericT alpha = static_cast(2); NumericT beta = static_cast(3); viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * viennacl::linalg::prod(ublas_m1, ublas_v2) + beta * ublas_v1; vcl_v1 = alpha * viennacl::linalg::prod(vcl_m1, vcl_v2) + beta * vcl_v1; if( diff(ublas_v1, vcl_v1) != 0 ) { std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << diff(ublas_v1, vcl_v1) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Transposed Matrix-Vector product" << std::endl; ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1); vcl_v2 = alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1); if( diff(ublas_v2, vcl_v2) != 0 ) { std::cout << "# Error at operation: transposed matrix-vector product" << std::endl; std::cout << " diff: " << diff(ublas_v2, vcl_v2) << std::endl; retval = EXIT_FAILURE; } std::cout << "Transposed Matrix-Vector product with scaled add" << std::endl; ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1) + beta * ublas_v2; vcl_v2 = alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1) + beta * vcl_v2; if( diff(ublas_v2, vcl_v2) != 0 ) { std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << diff(ublas_v2, vcl_v2) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename F> int test() { int retval = EXIT_SUCCESS; std::size_t num_rows = 141; std::size_t num_cols = 103; // -------------------------------------------------------------------------- ublas::vector ublas_v1(num_rows); for (std::size_t i = 0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas::vector ublas_v2 = ublas::scalar_vector(num_cols, NumericT(3)); ublas::matrix ublas_m1(ublas_v1.size(), ublas_v2.size()); ublas::matrix ublas_m2(ublas_v1.size(), ublas_v1.size()); for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); for (std::size_t i = 0; i < ublas_m2.size1(); ++i) for (std::size_t j = 0; j < ublas_m2.size2(); ++j) ublas_m2(i,j) = NumericT(j - i*j + i); viennacl::vector vcl_v1_native(ublas_v1.size()); viennacl::vector vcl_v1_large(4 * ublas_v1.size()); viennacl::vector_range< viennacl::vector > vcl_v1_range(vcl_v1_large, viennacl::range(3, ublas_v1.size() + 3)); viennacl::vector_slice< viennacl::vector > vcl_v1_slice(vcl_v1_large, viennacl::slice(2, 3, ublas_v1.size())); viennacl::vector vcl_v2_native(ublas_v2.size()); viennacl::vector vcl_v2_large(4 * ublas_v2.size()); viennacl::vector_range< viennacl::vector > vcl_v2_range(vcl_v2_large, viennacl::range(8, ublas_v2.size() + 8)); viennacl::vector_slice< viennacl::vector > vcl_v2_slice(vcl_v2_large, viennacl::slice(6, 2, ublas_v2.size())); viennacl::matrix vcl_m1_native(ublas_m1.size1(), ublas_m1.size2()); viennacl::matrix vcl_m1_large(4 * ublas_m1.size1(), 4 * ublas_m1.size2()); viennacl::matrix_range< viennacl::matrix > vcl_m1_range(vcl_m1_large, viennacl::range(8, ublas_m1.size1() + 8), viennacl::range(ublas_m1.size2(), 2 * ublas_m1.size2()) ); viennacl::matrix_slice< viennacl::matrix > vcl_m1_slice(vcl_m1_large, viennacl::slice(6, 2, ublas_m1.size1()), viennacl::slice(ublas_m1.size2(), 2, ublas_m1.size2()) ); viennacl::matrix vcl_m2_native(ublas_m2.size1(), ublas_m2.size2()); viennacl::matrix vcl_m2_large(4 * ublas_m2.size1(), 4 * ublas_m2.size2()); viennacl::matrix_range< viennacl::matrix > vcl_m2_range(vcl_m2_large, viennacl::range(8, ublas_m2.size1() + 8), viennacl::range(ublas_m2.size2(), 2 * ublas_m2.size2()) ); viennacl::matrix_slice< viennacl::matrix > vcl_m2_slice(vcl_m2_large, viennacl::slice(6, 2, ublas_m2.size1()), viennacl::slice(ublas_m2.size2(), 2, ublas_m2.size2()) ); // // Run a bunch of tests for rank-1-updates, matrix-vector products // std::cout << "------------ Testing rank-1-updates and matrix-vector products ------------------" << std::endl; std::cout << "* m = full, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_native, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_native, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_native, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = range std::cout << "* m = full, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_range, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_range, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_range, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = slice std::cout << "* m = full, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_slice, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_slice, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_slice, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); ///////////////////////////// matrix_range std::cout << "* m = range, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_native, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_native, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_native, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = range std::cout << "* m = range, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_range, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_range, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_range, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = slice std::cout << "* m = range, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_slice, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_slice, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_slice, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); ///////////////////////////// matrix_slice std::cout << "* m = slice, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_native, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_native, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_native, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = range std::cout << "* m = slice, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_range, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_range, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_range, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = slice std::cout << "* m = slice, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_slice, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_slice, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_slice, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; return retval; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef int NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: int" << std::endl; std::cout << " layout: row-major" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef int NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: int" << std::endl; std::cout << " layout: column-major" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef long NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: row-major" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef long NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: column-major" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/nmf.cpp000644 001750 001750 00000006650 12267307531 017212 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include #include #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/nmf.hpp" typedef float ScalarType; const ScalarType EPS = ScalarType(0.1); float matrix_compare(viennacl::matrix& res, viennacl::matrix& ref) { std::vector res_std(res.internal_size()); std::vector ref_std(ref.internal_size()); viennacl::fast_copy(res, &res_std[0]); viennacl::fast_copy(ref, &ref_std[0]); float diff = 0.0; float mx = 0.0; for(std::size_t i = 0; i < res_std.size(); i++) { diff = std::max(diff, std::abs(res_std[i] - ref_std[i])); mx = std::max(mx, res_std[i]); } return diff / mx; } void fill_random(std::vector< std::vector >& v) { for(std::size_t i = 0; i < v.size(); i++) { for (std::size_t j = 0; j < v[i].size(); ++j) v[i][j] = static_cast(rand()) / RAND_MAX; } } void test_nmf(std::size_t m, std::size_t k, std::size_t n) { std::vector< std::vector > stl_w(m, std::vector(k)); std::vector< std::vector > stl_h(k, std::vector(n)); viennacl::matrix v_ref(m, n); viennacl::matrix w_ref(m, k); viennacl::matrix h_ref(k, n); fill_random(stl_w); fill_random(stl_h); viennacl::copy(stl_w, w_ref); viennacl::copy(stl_h, h_ref); v_ref = viennacl::linalg::prod(w_ref, h_ref); //reference // Fill again with random numbers: fill_random(stl_w); fill_random(stl_h); viennacl::matrix w_nmf(m, k); viennacl::matrix h_nmf(k, n); viennacl::copy(stl_w, w_nmf); viennacl::copy(stl_h, h_nmf); viennacl::linalg::nmf_config conf; conf.print_relative_error(true); conf.max_iterations(5000); //5000 iterations are enough for the test viennacl::linalg::nmf(v_ref, w_nmf, h_nmf, conf); viennacl::matrix v_nmf = viennacl::linalg::prod(w_nmf, h_nmf); float diff = matrix_compare(v_ref, v_nmf); bool diff_ok = fabs(diff) < EPS; long iterations = static_cast(conf.iters()); printf("%6s [%lux%lux%lu] diff = %.6f (%ld iterations)\n", diff_ok ? "[[OK]]":"[FAIL]", m, k, n, diff, iterations); if (!diff_ok) exit(EXIT_FAILURE); } int main() { //srand(time(NULL)); //let's use deterministic tests, so keep the default srand() initialization test_nmf(3, 3, 3); test_nmf(3, 2, 3); test_nmf(16, 7, 12); test_nmf(140, 73, 180); test_nmf(427, 21, 523); std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/matrix_float_double.hpp000644 001750 001750 00000125054 12267307531 022462 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #define VIENNACL_WITH_UBLAS //#define NDEBUG //#define VIENNACL_BUILD_INFO // We don't need debug mode in UBLAS: #define BOOST_UBLAS_NDEBUG #include #include #include #include #include #include #include #include //#include "../benchmarks/benchmark-utils.hpp" #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_inf.hpp" #include "viennacl/linalg/norm_frobenius.hpp" #include "viennacl/matrix_proxy.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #include "viennacl/linalg/norm_frobenius.hpp" #include "boost/numeric/ublas/vector.hpp" #include "boost/numeric/ublas/matrix.hpp" #include "boost/numeric/ublas/matrix_proxy.hpp" #include "boost/numeric/ublas/vector_proxy.hpp" #include "boost/numeric/ublas/io.hpp" using namespace boost::numeric; template bool check_for_equality(MatrixType const & ublas_A, VCLMatrixType const & vcl_A, double epsilon) { typedef typename MatrixType::value_type value_type; boost::numeric::ublas::matrix vcl_A_cpu(vcl_A.size1(), vcl_A.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(vcl_A, vcl_A_cpu); for (std::size_t i=0; i epsilon) || (vcl_A_cpu(i,j) != vcl_A_cpu(i,j)) ) { std::cout << "Error at index (" << i << ", " << j << "): " << ublas_A(i,j) << " vs " << vcl_A_cpu(i,j) << std::endl; std::cout << std::endl << "TEST failed!" << std::endl; return false; } } } } std::cout << "PASSED!" << std::endl; return true; } template int run_test(double epsilon, UBLASMatrixType & ublas_A, UBLASMatrixType & ublas_B, UBLASMatrixType & ublas_C, ViennaCLMatrixType1 & vcl_A, ViennaCLMatrixType2 & vcl_B, ViennaCLMatrixType3 vcl_C) { typedef typename viennacl::result_of::cpu_value_type::type cpu_value_type; cpu_value_type alpha = cpu_value_type(3.1415); viennacl::scalar gpu_alpha = alpha; cpu_value_type beta = cpu_value_type(2.7182); viennacl::scalar gpu_beta = beta; // // Initializer: // std::cout << "Checking for zero_matrix initializer..." << std::endl; ublas_A = ublas::zero_matrix(ublas_A.size1(), ublas_A.size2()); vcl_A = viennacl::zero_matrix(vcl_A.size1(), vcl_A.size2()); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; std::cout << "Checking for scalar_matrix initializer..." << std::endl; ublas_A = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size2(), alpha); vcl_A = viennacl::scalar_matrix(vcl_A.size1(), vcl_A.size2(), alpha); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size2(), gpu_beta); vcl_A = viennacl::scalar_matrix( vcl_A.size1(), vcl_A.size2(), gpu_beta); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; /*std::cout << "Checking for identity initializer..." << std::endl; ublas_A = ublas::identity_matrix(ublas_A.size1()); vcl_A = viennacl::identity_matrix(vcl_A.size1()); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE;*/ std::cout << std::endl; //std::cout << "//" << std::endl; //std::cout << "////////// Test: Assignments //////////" << std::endl; //std::cout << "//" << std::endl; if (!check_for_equality(ublas_B, vcl_B, epsilon)) return EXIT_FAILURE; std::cout << "Testing matrix assignment... "; //std::cout << ublas_B(0,0) << " vs. " << vcl_B(0,0) << std::endl; ublas_A = ublas_B; vcl_A = vcl_B; if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; //std::cout << std::endl; //std::cout << "//" << std::endl; //std::cout << "////////// Test 1: Copy to GPU //////////" << std::endl; //std::cout << "//" << std::endl; ublas_A = ublas_B; viennacl::copy(ublas_B, vcl_A); std::cout << "Testing upper left copy to GPU... "; if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_C = ublas_B; viennacl::copy(ublas_B, vcl_C); std::cout << "Testing lower right copy to GPU... "; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; //std::cout << std::endl; //std::cout << "//" << std::endl; //std::cout << "////////// Test 2: Copy from GPU //////////" << std::endl; //std::cout << "//" << std::endl; std::cout << "Testing upper left copy to A... "; if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; std::cout << "Testing lower right copy to C... "; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; //std::cout << "//" << std::endl; //std::cout << "////////// Test 3: Addition //////////" << std::endl; //std::cout << "//" << std::endl; viennacl::copy(ublas_C, vcl_C); std::cout << "Inplace add: "; ublas_C += ublas_C; vcl_C += vcl_C; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Scaled inplace add: "; ublas_C += beta * ublas_A; vcl_C += gpu_beta * vcl_A; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Add: "; ublas_C = ublas_A + ublas_B; vcl_C = vcl_A + vcl_B; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Add with flipsign: "; ublas_C = - ublas_A + ublas_B; vcl_C = - vcl_A + vcl_B; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Scaled add (left): "; ublas_C = alpha * ublas_A + ublas_B; vcl_C = alpha * vcl_A + vcl_B; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Scaled add (left): "; vcl_C = gpu_alpha * vcl_A + vcl_B; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Scaled add (right): "; ublas_C = ublas_A + beta * ublas_B; vcl_C = vcl_A + beta * vcl_B; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Scaled add (right): "; vcl_C = vcl_A + gpu_beta * vcl_B; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Scaled add (both): "; ublas_C = alpha * ublas_A + beta * ublas_B; vcl_C = alpha * vcl_A + beta * vcl_B; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Scaled add (both): "; vcl_C = gpu_alpha * vcl_A + gpu_beta * vcl_B; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; //std::cout << "//" << std::endl; //std::cout << "////////// Test 4: Subtraction //////////" << std::endl; //std::cout << "//" << std::endl; viennacl::copy(ublas_C, vcl_C); std::cout << "Inplace sub: "; ublas_C -= ublas_B; vcl_C -= vcl_B; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Scaled Inplace sub: "; ublas_C -= alpha * ublas_B; vcl_C -= alpha * vcl_B; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Sub: "; ublas_C = ublas_A - ublas_B; vcl_C = vcl_A - vcl_B; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; std::cout << "Scaled sub (left): "; ublas_B = alpha * ublas_A - ublas_C; vcl_B = alpha * vcl_A - vcl_C; if (!check_for_equality(ublas_B, vcl_B, epsilon)) return EXIT_FAILURE; std::cout << "Scaled sub (left): "; vcl_B = gpu_alpha * vcl_A - vcl_C; if (!check_for_equality(ublas_B, vcl_B, epsilon)) return EXIT_FAILURE; std::cout << "Scaled sub (right): "; ublas_B = ublas_A - beta * ublas_C; vcl_B = vcl_A - vcl_C * beta; if (!check_for_equality(ublas_B, vcl_B, epsilon)) return EXIT_FAILURE; std::cout << "Scaled sub (right): "; vcl_B = vcl_A - vcl_C * gpu_beta; if (!check_for_equality(ublas_B, vcl_B, epsilon)) return EXIT_FAILURE; std::cout << "Scaled sub (both): "; ublas_B = alpha * ublas_A - beta * ublas_C; vcl_B = alpha * vcl_A - vcl_C * beta; if (!check_for_equality(ublas_B, vcl_B, epsilon)) return EXIT_FAILURE; std::cout << "Scaled sub (both): "; vcl_B = gpu_alpha * vcl_A - vcl_C * gpu_beta; if (!check_for_equality(ublas_B, vcl_B, epsilon)) return EXIT_FAILURE; std::cout << "Unary operator-: "; ublas_C = - ublas_A; vcl_C = - vcl_A; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; //std::cout << "//" << std::endl; //std::cout << "////////// Test 5: Scaling //////////" << std::endl; //std::cout << "//" << std::endl; viennacl::copy(ublas_A, vcl_A); std::cout << "Multiplication with CPU scalar: "; ublas_A *= alpha; vcl_A *= alpha; if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; std::cout << "Multiplication with GPU scalar: "; ublas_A *= beta; vcl_A *= gpu_beta; if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; std::cout << "Division with CPU scalar: "; ublas_A /= alpha; vcl_A /= alpha; if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; std::cout << "Division with GPU scalar: "; ublas_A /= beta; vcl_A /= gpu_beta; if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; std::cout << "Testing elementwise multiplication..." << std::endl; ublas_B = ublas::scalar_matrix(ublas_B.size1(), ublas_B.size2(), cpu_value_type(1.4142)); ublas_A = cpu_value_type(3.1415) * ublas_B; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_B, vcl_B); ublas_A = ublas::element_prod(ublas_A, ublas_B); vcl_A = viennacl::linalg::element_prod(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A += ublas::element_prod(ublas_A, ublas_B); vcl_A += viennacl::linalg::element_prod(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A -= ublas::element_prod(ublas_A, ublas_B); vcl_A -= viennacl::linalg::element_prod(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; /////// ublas_A = ublas::element_prod(ublas_A + ublas_B, ublas_B); vcl_A = viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A += ublas::element_prod(ublas_A + ublas_B, ublas_B); vcl_A += viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A -= ublas::element_prod(ublas_A + ublas_B, ublas_B); vcl_A -= viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; /////// ublas_A = ublas::element_prod(ublas_A, ublas_B + ublas_A); vcl_A = viennacl::linalg::element_prod(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A += ublas::element_prod(ublas_A, ublas_B + ublas_A); vcl_A += viennacl::linalg::element_prod(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A -= ublas::element_prod(ublas_A, ublas_B + ublas_A); vcl_A -= viennacl::linalg::element_prod(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; /////// ublas_A = ublas::element_prod(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A = viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A += ublas::element_prod(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A += viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A -= ublas::element_prod(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A -= viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_B = ublas::scalar_matrix(ublas_B.size1(), ublas_B.size2(), cpu_value_type(1.4142)); ublas_A = cpu_value_type(3.1415) * ublas_B; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); ublas_A = ublas::element_div(ublas_A, ublas_B); vcl_A = viennacl::linalg::element_div(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A += ublas::element_div(ublas_A, ublas_B); vcl_A += viennacl::linalg::element_div(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A -= ublas::element_div(ublas_A, ublas_B); vcl_A -= viennacl::linalg::element_div(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; /////// ublas_A = ublas::element_div(ublas_A + ublas_B, ublas_B); vcl_A = viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A += ublas::element_div(ublas_A + ublas_B, ublas_B); vcl_A += viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A -= ublas::element_div(ublas_A + ublas_B, ublas_B); vcl_A -= viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; /////// ublas_A = ublas::element_div(ublas_A, ublas_B + ublas_A); vcl_A = viennacl::linalg::element_div(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A += ublas::element_div(ublas_A, ublas_B + ublas_A); vcl_A += viennacl::linalg::element_div(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A -= ublas::element_div(ublas_A, ublas_B + ublas_A); vcl_A -= viennacl::linalg::element_div(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; /////// ublas_A = ublas::element_div(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A = viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A += ublas::element_div(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A += viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A -= ublas::element_div(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A -= viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; // element_pow std::cout << "Testing unary element_pow()..." << std::endl; ublas_B = ublas::scalar_matrix(ublas_B.size1(), ublas_B.size2(), cpu_value_type(1.4142)); ublas_A = cpu_value_type(3.1415) * ublas_B; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); for (std::size_t i=0; i(ublas_B.size1(), ublas_B.size2(), cpu_value_type(1.4142)); \ ublas_A = cpu_value_type(3.1415) * ublas_B; \ ublas_C = cpu_value_type(2.7172) * ublas_A; \ viennacl::copy(ublas_A, vcl_A); \ viennacl::copy(ublas_B, vcl_B); \ viennacl::copy(ublas_C, vcl_C); \ viennacl::copy(ublas_B, vcl_B); \ \ for (std::size_t i=0; i int run_test(double epsilon) { //typedef float ScalarType; typedef boost::numeric::ublas::matrix MatrixType; typedef viennacl::matrix VCLMatrixType; std::size_t dim_rows = 131; std::size_t dim_cols = 33; //std::size_t dim_rows = 5; //std::size_t dim_cols = 3; //setup ublas objects: MatrixType ublas_A(dim_rows, dim_cols); MatrixType ublas_B(dim_rows, dim_cols); MatrixType ublas_C(dim_rows, dim_cols); for (std::size_t i=0; i vcl_range_A(vcl_A_full, vcl_A_r1, vcl_A_r2); viennacl::slice vcl_A_s1(2, 3, dim_rows); viennacl::slice vcl_A_s2(2 * dim_cols, 2, dim_cols); viennacl::matrix_slice vcl_slice_A(vcl_A_full, vcl_A_s1, vcl_A_s2); // // Create B // VCLMatrixType vcl_B(dim_rows, dim_cols); viennacl::range vcl_B_r1(dim_rows, 2 * dim_rows); viennacl::range vcl_B_r2(2 * dim_cols, 3 * dim_cols); viennacl::matrix_range vcl_range_B(vcl_B_full, vcl_B_r1, vcl_B_r2); viennacl::slice vcl_B_s1(2 * dim_rows, 2, dim_rows); viennacl::slice vcl_B_s2(dim_cols, 3, dim_cols); viennacl::matrix_slice vcl_slice_B(vcl_B_full, vcl_B_s1, vcl_B_s2); // // Create C // VCLMatrixType vcl_C(dim_rows, dim_cols); viennacl::range vcl_C_r1(2 * dim_rows, 3 * dim_rows); viennacl::range vcl_C_r2(3 * dim_cols, 4 * dim_cols); viennacl::matrix_range vcl_range_C(vcl_C_full, vcl_C_r1, vcl_C_r2); viennacl::slice vcl_C_s1(dim_rows, 2, dim_rows); viennacl::slice vcl_C_s2(0, 3, dim_cols); viennacl::matrix_slice vcl_slice_C(vcl_C_full, vcl_C_s1, vcl_C_s2); viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); viennacl::copy(ublas_C, vcl_range_C); viennacl::copy(ublas_C, vcl_slice_C); std::cout << std::endl; std::cout << "//" << std::endl; std::cout << "////////// Test: Copy CTOR //////////" << std::endl; std::cout << "//" << std::endl; { std::cout << "Testing matrix created from range... "; VCLMatrixType vcl_temp = vcl_range_A; if (check_for_equality(ublas_A, vcl_temp, epsilon)) std::cout << "PASSED!" << std::endl; else { std::cout << "ublas_A: " << ublas_A << std::endl; std::cout << "vcl_temp: " << vcl_temp << std::endl; std::cout << "vcl_range_A: " << vcl_range_A << std::endl; std::cout << "vcl_A: " << vcl_A << std::endl; std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } std::cout << "Testing matrix created from slice... "; VCLMatrixType vcl_temp2 = vcl_range_B; if (check_for_equality(ublas_B, vcl_temp2, epsilon)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } } std::cout << "//" << std::endl; std::cout << "////////// Test: Initializer for matrix type //////////" << std::endl; std::cout << "//" << std::endl; { ublas::matrix ublas_dummy1 = ublas::identity_matrix(ublas_A.size1()); ublas::matrix ublas_dummy2 = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3.0); ublas::matrix ublas_dummy3 = ublas::zero_matrix(ublas_A.size1(), ublas_A.size1()); viennacl::matrix vcl_dummy1 = viennacl::identity_matrix(ublas_A.size1()); viennacl::matrix vcl_dummy2 = viennacl::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3.0); viennacl::matrix vcl_dummy3 = viennacl::zero_matrix(ublas_A.size1(), ublas_A.size1()); std::cout << "Testing initializer CTOR... "; if ( check_for_equality(ublas_dummy1, vcl_dummy1, epsilon) && check_for_equality(ublas_dummy2, vcl_dummy2, epsilon) && check_for_equality(ublas_dummy3, vcl_dummy3, epsilon) ) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } ublas_dummy1 = ublas::zero_matrix(ublas_A.size1(), ublas_A.size1()); ublas_dummy2 = ublas::identity_matrix(ublas_A.size1()); ublas_dummy3 = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3.0); vcl_dummy1 = viennacl::zero_matrix(ublas_A.size1(), ublas_A.size1()); vcl_dummy2 = viennacl::identity_matrix(ublas_A.size1()); vcl_dummy3 = viennacl::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3.0); std::cout << "Testing initializer assignment... "; if ( check_for_equality(ublas_dummy1, vcl_dummy1, epsilon) && check_for_equality(ublas_dummy2, vcl_dummy2, epsilon) && check_for_equality(ublas_dummy3, vcl_dummy3, epsilon) ) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } } std::cout << "//" << std::endl; std::cout << "////////// Test: Norms //////////" << std::endl; std::cout << "//" << std::endl; /*ScalarType ublas_norm_1 = viennacl::linalg::norm_1(ublas_C); ScalarType vcl_norm_1 = viennacl::linalg::norm_1(vcl_C); if ( std::fabs(ublas_norm_1 - vcl_norm_1) / ublas_norm_1 > epsilon) { std::cerr << "Failure at norm_1(): " << std::fabs(ublas_norm_1 - vcl_norm_1) / ublas_norm_1 << std::endl; return EXIT_FAILURE; } ScalarType ublas_norm_inf = ublas::norm_inf(ublas_C); ScalarType vcl_norm_inf = viennacl::linalg::norm_inf(vcl_C); if ( std::fabs(ublas_norm_inf - vcl_norm_inf) / ublas_norm_inf > epsilon) { std::cerr << "Failure at norm_inf(): " << std::fabs(ublas_norm_inf - vcl_norm_inf) / ublas_norm_inf << std::endl; return EXIT_FAILURE; }*/ ScalarType ublas_norm_frobenius = viennacl::linalg::norm_frobenius(ublas_C); ScalarType vcl_norm_frobenius = viennacl::linalg::norm_frobenius(vcl_C); if ( std::fabs(ublas_norm_frobenius - vcl_norm_frobenius) / ublas_norm_frobenius > epsilon) { std::cerr << "Failure at norm_frobenius()" << std::endl; return EXIT_FAILURE; } viennacl::scalar device_ublas_norm_frobenius = viennacl::linalg::norm_frobenius(ublas_C); viennacl::scalar device_vcl_norm_frobenius = viennacl::linalg::norm_frobenius(vcl_C); if ( std::fabs(device_ublas_norm_frobenius - device_vcl_norm_frobenius) / device_ublas_norm_frobenius > epsilon) { std::cerr << "Failure at norm_frobenius()" << std::endl; return EXIT_FAILURE; } std::cout << "PASSED!" << std::endl; // // run operation tests: // std::cout << "//" << std::endl; std::cout << "////////// Test: Operations //////////" << std::endl; std::cout << "//" << std::endl; /////// A=matrix: std::cout << "Testing A=matrix, B=matrix, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=matrix, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=matrix, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=range, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_range_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=range, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=range, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=slice, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=slice, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=slice, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } /////// A=range: std::cout << "Testing A=range, B=matrix, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=matrix, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=matrix, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=range, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_range_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=range, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=range, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=slice, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=slice, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=slice, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } /////// A=slice: std::cout << "Testing A=slice, B=matrix, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=matrix, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=matrix, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=range, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_range_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=range, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=range, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=slice, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=slice, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=slice, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/generator_blas2.cpp000644 001750 001750 00000020757 12267307531 021507 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2012, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include // // *** Boost // #include #include // // *** ViennaCL // #define VIENNACL_WITH_UBLAS 1 //#define VIENNACL_DEBUG_ALL //#define VIENNACL_DEBUG_BUILD #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/generator/generate.hpp" #define CHECK_RESULT(cpu,gpu, op) \ if ( double delta = std::fabs ( diff ( cpu, gpu) ) > epsilon ) {\ std::cout << "# Error at operation: " #op << std::endl;\ std::cout << " diff: " << delta << std::endl;\ retval = EXIT_FAILURE;\ }\ using namespace boost::numeric; using namespace viennacl; template ScalarType diff(ublas::matrix & mat1, VCLMatrixType & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } template ScalarType diff ( ublas::vector & v1, viennacl::vector & v2 ) { ublas::vector v2_cpu ( v2.size() ); viennacl::copy( v2.begin(), v2.end(), v2_cpu.begin() ); for ( unsigned int i=0; i 0 ) v2_cpu[i] = std::fabs ( v2_cpu[i] - v1[i] ) / std::max ( std::fabs ( v2_cpu[i] ), std::fabs ( v1[i] ) ); else v2_cpu[i] = 0.0; } return norm_inf ( v2_cpu ); } template< typename NumericT, class Layout, typename Epsilon > int test( Epsilon const& epsilon) { int retval = EXIT_SUCCESS; ublas::vector cx; ublas::vector cy; ublas::matrix cA; ublas::matrix cB; ublas::matrix cC; ublas::matrix cD; unsigned int size1 = 841; unsigned int size2 = 772; cA.resize(size1,size2); cx.resize(size2); cy.resize(size1); for(unsigned int i=0; i(std::rand()/RAND_MAX); } } for(unsigned int i=0; i(std::rand()/RAND_MAX); } for(unsigned int i=0; i(std::rand()/RAND_MAX); } // std::cout << "Running tests for matrix of size " << cA.size1() << "," << cA.size2() << std::endl; viennacl::matrix A (size1, size2); viennacl::matrix B (size1, size2); viennacl::matrix C (size1, size2); viennacl::matrix D (size1, size2); viennacl::vector x(size2); viennacl::vector y(size1); cB = cA; cC = cA; cD = cA; viennacl::copy(cA,A); viennacl::copy(cB,B); viennacl::copy(cC,C); viennacl::copy(cD,D); viennacl::copy(cx,x); viennacl::copy(cy,y); // -------------------------------------------------------------------------- { std::cout << "y = A*x..." << std::endl; cy = ublas::prod(cA,cx); viennacl::scheduler::statement statement(y, viennacl::op_assign(), viennacl::linalg::prod(A,x)); generator::generate_enqueue_statement(statement, statement.array()[0]); viennacl::backend::finish(); CHECK_RESULT(cy,y,y=A*x) } { std::cout << "x = trans(A)*y..." << std::endl; cx = ublas::prod(trans(cA),cy); viennacl::scheduler::statement statement(x, viennacl::op_assign(), viennacl::linalg::prod(trans(A),y)); generator::generate_enqueue_statement(statement, statement.array()[0]); viennacl::backend::finish(); CHECK_RESULT(cx,x,x=trans(A)*y) } // { // std::cout << "y = reduce_rows(A)..." << std::endl; // for(unsigned int i = 0 ; i < size1 ; ++i){ // NumericT current_max = -INFINITY; // for(unsigned int j = 0 ; j < size2 ; ++j){ // current_max = std::max(current_max,cA(i,j)); // } // cy(i) = current_max; // } // generator::custom_operation op; // op.add(dv_t(y) = generator::reduce_rows(dm_t(A))); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(cy,y,y = reduce_rows(A)) // } // { // std::cout << "x = reduce_cols(A)..." << std::endl; // for(unsigned int j = 0 ; j < size2 ; ++j){ // NumericT current_max = -INFINITY; // for(unsigned int i = 0 ; i < size1 ; ++i){ // current_max = std::max(current_max,cA(i,j)); // } // cx(j) = current_max; // } // generator::custom_operation op; // op.add(dv_t(x) = generator::reduce_cols(dm_t(A))); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(cx,x,x = reduce_cols(A)) // } return retval; } int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Generated BLAS2" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { double epsilon = 1.0E-4; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " --------------" << std::endl; std::cout << " Row-Major" << std::endl; std::cout << " --------------" << std::endl; retval = test (epsilon); std::cout << " --------------" << std::endl; std::cout << " Column-Major" << std::endl; std::cout << " --------------" << std::endl; retval &= test (epsilon); if ( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { double epsilon = 1.0E-4; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " --------------" << std::endl; std::cout << " Row-Major" << std::endl; std::cout << " --------------" << std::endl; retval = test (epsilon); std::cout << " --------------" << std::endl; std::cout << " Column-Major" << std::endl; std::cout << " --------------" << std::endl; retval &= test (epsilon); if ( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } } ViennaCL-1.5.1-src/tests/src/blas3_prod_double.cu000644 001750 001750 00000004367 12267307531 021644 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "blas3_prod_float_double.hpp" // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: BLAS 3 routines" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/external_1.cu000644 001750 001750 00000005323 12267307531 020315 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // A check for the absence of external linkage (otherwise, library is not truly 'header-only') // //#define VIENNACL_WITH_EIGEN #define VIENNACL_WITH_UBLAS // // *** System // #include // // *** ViennaCL // #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/coordinate_matrix.hpp" #include "viennacl/ell_matrix.hpp" #include "viennacl/hyb_matrix.hpp" #ifdef VIENNACL_WITH_OPENCL #include "viennacl/circulant_matrix.hpp" #include "viennacl/hankel_matrix.hpp" #include "viennacl/toeplitz_matrix.hpp" #include "viennacl/vandermonde_matrix.hpp" #endif #include "viennacl/linalg/ilu.hpp" #include "viennacl/linalg/row_scaling.hpp" #include "viennacl/linalg/jacobi_precond.hpp" #include "viennacl/linalg/cg.hpp" #include "viennacl/linalg/bicgstab.hpp" #include "viennacl/linalg/gmres.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/qr.hpp" #include "viennacl/misc/bandwidth_reduction.hpp" #ifdef VIENNACL_WITH_OPENCL #include "viennacl/linalg/amg.hpp" #include "viennacl/linalg/spai.hpp" #include "viennacl/linalg/svd.hpp" #include "viennacl/fft.hpp" #include "viennacl/generator/generate.hpp" #endif #include "viennacl/io/matrix_market.hpp" #include "viennacl/scheduler/execute.hpp" //defined in external_2.cpp void other_func(); // // ------------------------------------------------------------- // int main() { typedef float NumericType; //doing nothing but instantiating a few types viennacl::scalar s; viennacl::vector v(10); viennacl::matrix m(10, 10); viennacl::compressed_matrix compr(10, 10); viennacl::coordinate_matrix coord(10, 10); //this is the external linkage check: other_func(); std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/vector_double.cpp000644 001750 001750 00000004275 12267307531 021267 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "vector_float_double.hpp" // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-10; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/matrix_col_double.cu000644 001750 001750 00000003447 12267307531 021753 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_float_double.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, column-major, double precision " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { double epsilon = 1e-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/scheduler_matrix.cpp000644 001750 001750 00000075665 12267307531 022010 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #define VIENNACL_WITH_UBLAS //#define NDEBUG //#define VIENNACL_BUILD_INFO #include #include #include #include #include #include #include #include //#include "../benchmarks/benchmark-utils.hpp" #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/prod.hpp" /*#include "viennacl/compressed_matrix.hpp" #include "viennacl/linalg/cg.hpp" #include "viennacl/linalg/inner_prod.hpp" #include "viennacl/linalg/ilu.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/io/matrix_market.hpp"*/ #include "viennacl/matrix_proxy.hpp" #include "viennacl/vector_proxy.hpp" #include "boost/numeric/ublas/vector.hpp" #include "boost/numeric/ublas/matrix.hpp" #include "boost/numeric/ublas/matrix_proxy.hpp" #include "boost/numeric/ublas/vector_proxy.hpp" #include "boost/numeric/ublas/io.hpp" #include "viennacl/scheduler/execute.hpp" using namespace boost::numeric; template bool check_for_equality(MatrixType const & ublas_A, VCLMatrixType const & vcl_A, double epsilon) { typedef typename MatrixType::value_type value_type; boost::numeric::ublas::matrix vcl_A_cpu(vcl_A.size1(), vcl_A.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(vcl_A, vcl_A_cpu); for (std::size_t i=0; i epsilon) || (vcl_A_cpu(i,j) != vcl_A_cpu(i,j)) ) { std::cout << "Error at index (" << i << ", " << j << "): " << ublas_A(i,j) << " vs " << vcl_A_cpu(i,j) << std::endl; std::cout << std::endl << "TEST failed!" << std::endl; return false; } } } } std::cout << "PASSED!" << std::endl; return true; } template int run_test(double epsilon, UBLASMatrixType & ublas_A, UBLASMatrixType & ublas_B, UBLASMatrixType & ublas_C, ViennaCLMatrixType1 & vcl_A, ViennaCLMatrixType2 & vcl_B, ViennaCLMatrixType3 vcl_C) { typedef typename viennacl::result_of::cpu_value_type::type cpu_value_type; cpu_value_type alpha = cpu_value_type(3.1415); viennacl::scalar gpu_alpha = alpha; cpu_value_type beta = cpu_value_type(2.7182); viennacl::scalar gpu_beta = beta; // // Initializer: // std::cout << "Checking for zero_matrix initializer..." << std::endl; ublas_A = ublas::zero_matrix(ublas_A.size1(), ublas_A.size2()); vcl_A = viennacl::zero_matrix(vcl_A.size1(), vcl_A.size2()); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; std::cout << "Checking for scalar_matrix initializer..." << std::endl; ublas_A = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size2(), alpha); vcl_A = viennacl::scalar_matrix(vcl_A.size1(), vcl_A.size2(), alpha); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_A = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size2(), gpu_beta); vcl_A = viennacl::scalar_matrix( vcl_A.size1(), vcl_A.size2(), gpu_beta); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; /*std::cout << "Checking for identity initializer..." << std::endl; ublas_A = ublas::identity_matrix(ublas_A.size1()); vcl_A = viennacl::identity_matrix(vcl_A.size1()); if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE;*/ std::cout << std::endl; //std::cout << "//" << std::endl; //std::cout << "////////// Test: Assignments //////////" << std::endl; //std::cout << "//" << std::endl; if (!check_for_equality(ublas_B, vcl_B, epsilon)) return EXIT_FAILURE; std::cout << "Testing matrix assignment... "; //std::cout << ublas_B(0,0) << " vs. " << vcl_B(0,0) << std::endl; ublas_A = ublas_B; vcl_A = vcl_B; if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; //std::cout << std::endl; //std::cout << "//" << std::endl; //std::cout << "////////// Test 1: Copy to GPU //////////" << std::endl; //std::cout << "//" << std::endl; ublas_A = ublas_B; viennacl::copy(ublas_B, vcl_A); std::cout << "Testing upper left copy to GPU... "; if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; ublas_C = ublas_B; viennacl::copy(ublas_B, vcl_C); std::cout << "Testing lower right copy to GPU... "; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; //std::cout << std::endl; //std::cout << "//" << std::endl; //std::cout << "////////// Test 2: Copy from GPU //////////" << std::endl; //std::cout << "//" << std::endl; std::cout << "Testing upper left copy to A... "; if (!check_for_equality(ublas_A, vcl_A, epsilon)) return EXIT_FAILURE; std::cout << "Testing lower right copy to C... "; if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; //std::cout << "//" << std::endl; //std::cout << "////////// Test 3: Addition //////////" << std::endl; //std::cout << "//" << std::endl; viennacl::copy(ublas_C, vcl_C); std::cout << "Assignment: "; { ublas_C = ublas_B; viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), vcl_B); // same as vcl_C = vcl_B; viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; } std::cout << "Inplace add: "; { ublas_C += ublas_C; viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), vcl_C); // same as vcl_C += vcl_C; viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; } std::cout << "Inplace sub: "; { ublas_C -= ublas_C; viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_sub(), vcl_C); // same as vcl_C -= vcl_C; viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; } std::cout << "Add: "; { ublas_C = ublas_A + ublas_B; viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), vcl_A + vcl_B); // same as vcl_C = vcl_A + vcl_B; viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; } std::cout << "Sub: "; { ublas_C = ublas_A - ublas_B; viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), vcl_A - vcl_B); // same as vcl_C = vcl_A - vcl_B; viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; } std::cout << "Composite assignments: "; { ublas_C += alpha * ublas_A - beta * ublas_B + ublas_A / beta - ublas_B / alpha; viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), alpha * vcl_A - beta * vcl_B + vcl_A / beta - vcl_B / alpha); // same as vcl_C += alpha * vcl_A - beta * vcl_B + vcl_A / beta - vcl_B / alpha; viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon)) return EXIT_FAILURE; } std::cout << "--- Testing elementwise operations (binary) ---" << std::endl; std::cout << "x = element_prod(x, y)... "; { ublas_C = element_prod(ublas_A, ublas_B); viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_A, vcl_B)); viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_prod(x + y, y)... "; { ublas_C = element_prod(ublas_A + ublas_B, ublas_B); viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B)); viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_prod(x, x + y)... "; { ublas_C = element_prod(ublas_A, ublas_A + ublas_B); viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_A, vcl_B + vcl_A)); viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_prod(x - y, y + x)... "; { ublas_C = element_prod(ublas_A - ublas_B, ublas_B + ublas_A); viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_A - vcl_B, vcl_B + vcl_A)); viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x, y)... "; { ublas_C = element_div(ublas_A, ublas_B); viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::element_div(vcl_A, vcl_B)); viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x + y, y)... "; { ublas_C = element_div(ublas_A + ublas_B, ublas_B); viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B)); viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x, x + y)... "; { ublas_C = element_div(ublas_A, ublas_A + ublas_B); viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::element_div(vcl_A, vcl_B + vcl_A)); viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x - y, y + x)... "; { ublas_C = element_div(ublas_A - ublas_B, ublas_B + ublas_A); viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::element_div(vcl_A - vcl_B, vcl_B + vcl_A)); viennacl::scheduler::execute(my_statement); if (!check_for_equality(ublas_C, vcl_C, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "--- Testing elementwise operations (unary) ---" << std::endl; #define GENERATE_UNARY_OP_TEST(OPNAME) \ ublas_A = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size2(), cpu_value_type(0.21)); \ ublas_B = cpu_value_type(3.1415) * ublas_A; \ viennacl::copy(ublas_A, vcl_A); \ viennacl::copy(ublas_B, vcl_B); \ { \ for (std::size_t i=0; i int run_test(double epsilon) { //typedef float ScalarType; typedef boost::numeric::ublas::matrix MatrixType; typedef viennacl::matrix VCLMatrixType; std::size_t dim_rows = 131; std::size_t dim_cols = 33; //std::size_t dim_rows = 5; //std::size_t dim_cols = 3; //setup ublas objects: MatrixType ublas_A(dim_rows, dim_cols); MatrixType ublas_B(dim_rows, dim_cols); MatrixType ublas_C(dim_rows, dim_cols); for (std::size_t i=0; i vcl_range_A(vcl_A_full, vcl_A_r1, vcl_A_r2); viennacl::slice vcl_A_s1(2, 3, dim_rows); viennacl::slice vcl_A_s2(2 * dim_cols, 2, dim_cols); viennacl::matrix_slice vcl_slice_A(vcl_A_full, vcl_A_s1, vcl_A_s2); // // Create B // VCLMatrixType vcl_B(dim_rows, dim_cols); viennacl::range vcl_B_r1(dim_rows, 2 * dim_rows); viennacl::range vcl_B_r2(2 * dim_cols, 3 * dim_cols); viennacl::matrix_range vcl_range_B(vcl_B_full, vcl_B_r1, vcl_B_r2); viennacl::slice vcl_B_s1(2 * dim_rows, 2, dim_rows); viennacl::slice vcl_B_s2(dim_cols, 3, dim_cols); viennacl::matrix_slice vcl_slice_B(vcl_B_full, vcl_B_s1, vcl_B_s2); // // Create C // VCLMatrixType vcl_C(dim_rows, dim_cols); viennacl::range vcl_C_r1(2 * dim_rows, 3 * dim_rows); viennacl::range vcl_C_r2(3 * dim_cols, 4 * dim_cols); viennacl::matrix_range vcl_range_C(vcl_C_full, vcl_C_r1, vcl_C_r2); viennacl::slice vcl_C_s1(dim_rows, 2, dim_rows); viennacl::slice vcl_C_s2(0, 3, dim_cols); viennacl::matrix_slice vcl_slice_C(vcl_C_full, vcl_C_s1, vcl_C_s2); viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); viennacl::copy(ublas_C, vcl_range_C); viennacl::copy(ublas_C, vcl_slice_C); std::cout << std::endl; std::cout << "//" << std::endl; std::cout << "////////// Test: Copy CTOR //////////" << std::endl; std::cout << "//" << std::endl; { std::cout << "Testing matrix created from range... "; VCLMatrixType vcl_temp = vcl_range_A; if (check_for_equality(ublas_A, vcl_temp, epsilon)) std::cout << "PASSED!" << std::endl; else { std::cout << "ublas_A: " << ublas_A << std::endl; std::cout << "vcl_temp: " << vcl_temp << std::endl; std::cout << "vcl_range_A: " << vcl_range_A << std::endl; std::cout << "vcl_A: " << vcl_A << std::endl; std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } std::cout << "Testing matrix created from slice... "; VCLMatrixType vcl_temp2 = vcl_range_B; if (check_for_equality(ublas_B, vcl_temp2, epsilon)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } } std::cout << "//" << std::endl; std::cout << "////////// Test: Initializer for matrix type //////////" << std::endl; std::cout << "//" << std::endl; { ublas::matrix ublas_dummy1 = ublas::identity_matrix(ublas_A.size1()); ublas::matrix ublas_dummy2 = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3.0); ublas::matrix ublas_dummy3 = ublas::zero_matrix(ublas_A.size1(), ublas_A.size1()); viennacl::matrix vcl_dummy1 = viennacl::identity_matrix(ublas_A.size1()); viennacl::matrix vcl_dummy2 = viennacl::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3.0); viennacl::matrix vcl_dummy3 = viennacl::zero_matrix(ublas_A.size1(), ublas_A.size1()); std::cout << "Testing initializer CTOR... "; if ( check_for_equality(ublas_dummy1, vcl_dummy1, epsilon) && check_for_equality(ublas_dummy2, vcl_dummy2, epsilon) && check_for_equality(ublas_dummy3, vcl_dummy3, epsilon) ) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } ublas_dummy1 = ublas::zero_matrix(ublas_A.size1(), ublas_A.size1()); ublas_dummy2 = ublas::identity_matrix(ublas_A.size1()); ublas_dummy3 = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3.0); vcl_dummy1 = viennacl::zero_matrix(ublas_A.size1(), ublas_A.size1()); vcl_dummy2 = viennacl::identity_matrix(ublas_A.size1()); vcl_dummy3 = viennacl::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3.0); std::cout << "Testing initializer assignment... "; if ( check_for_equality(ublas_dummy1, vcl_dummy1, epsilon) && check_for_equality(ublas_dummy2, vcl_dummy2, epsilon) && check_for_equality(ublas_dummy3, vcl_dummy3, epsilon) ) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } } // // run operation tests: // /////// A=matrix: std::cout << "Testing A=matrix, B=matrix, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=matrix, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=matrix, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=range, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_range_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=range, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=range, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=slice, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=slice, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=slice, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } /////// A=range: std::cout << "Testing A=range, B=matrix, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=matrix, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=matrix, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=range, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_range_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=range, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=range, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=slice, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=slice, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=slice, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } /////// A=slice: std::cout << "Testing A=slice, B=matrix, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=matrix, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=matrix, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=range, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_range_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=range, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=range, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=slice, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=slice, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=slice, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(epsilon, ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } return EXIT_SUCCESS; } int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix Range" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; double epsilon = 1e-4; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " --- row-major ---" << std::endl; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " --- column-major ---" << std::endl; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { double epsilon = 1e-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/vector_float.cpp000644 001750 001750 00000004116 12267307531 021114 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "vector_float_double.hpp" // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = static_cast(1.0E-2); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/matrix_vector.cpp000644 001750 001750 00000123423 12267307531 021316 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/lu.hpp" #include "examples/tutorial/Random.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(ublas::vector const & v1, VCLVectorType const & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return norm_inf(v2_cpu); } template ScalarType diff(ublas::matrix const & mat1, VCLMatrixType const & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } // // ------------------------------------------------------------- // template int test_prod_rank1(Epsilon const & epsilon, UblasMatrixType & ublas_m1, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, UblasMatrixType & ublas_m2, VCLMatrixType & vcl_m1, VCLVectorType1 & vcl_v1, VCLVectorType2 & vcl_v2, VCLMatrixType & vcl_m2) { int retval = EXIT_SUCCESS; // sync data: ublas_v1 = ublas::scalar_vector(ublas_v1.size(), NumericT(0.1234)); ublas_v2 = ublas::scalar_vector(ublas_v2.size(), NumericT(0.4321)); viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); viennacl::copy(ublas_m1, vcl_m1); // -------------------------------------------------------------------------- std::cout << "Rank 1 update" << std::endl; ublas_m1 += ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2); if( std::fabs(diff(ublas_m1, vcl_m1)) > epsilon ) { std::cout << "# Error at operation: rank 1 update" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_m1, vcl_m1)) << std::endl; return EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Scaled rank 1 update - CPU Scalar" << std::endl; ublas_m1 += NumericT(4.2) * ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += NumericT(2.1) * viennacl::linalg::outer_prod(vcl_v1, vcl_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2) * NumericT(2.1); //check proper compilation if( std::fabs(diff(ublas_m1, vcl_m1)) > epsilon ) { std::cout << "# Error at operation: scaled rank 1 update - CPU Scalar" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_m1, vcl_m1)) << std::endl; return EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Scaled rank 1 update - GPU Scalar" << std::endl; ublas_m1 += NumericT(4.2) * ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += viennacl::scalar(NumericT(2.1)) * viennacl::linalg::outer_prod(vcl_v1, vcl_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2) * viennacl::scalar(NumericT(2.1)); //check proper compilation if( std::fabs(diff(ublas_m1, vcl_m1)) > epsilon ) { std::cout << "# Error at operation: scaled rank 1 update - GPU Scalar" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_m1, vcl_m1)) << std::endl; return EXIT_FAILURE; } //reset vcl_matrix: viennacl::copy(ublas_m1, vcl_m1); // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product" << std::endl; ublas_v1 = viennacl::linalg::prod(ublas_m1, ublas_v2); vcl_v1 = viennacl::linalg::prod(vcl_m1, vcl_v2); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product with scaled add" << std::endl; NumericT alpha = static_cast(2.786); NumericT beta = static_cast(1.432); viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * viennacl::linalg::prod(ublas_m1, ublas_v2) + beta * ublas_v1; vcl_v1 = alpha * viennacl::linalg::prod(vcl_m1, vcl_v2) + beta * vcl_v1; if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Transposed Matrix-Vector product" << std::endl; ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1); vcl_v2 = alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1); if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: transposed matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Transposed Matrix-Vector product with scaled add" << std::endl; ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1) + beta * ublas_v2; vcl_v2 = alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1) + beta * vcl_v2; if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Row extraction from matrix" << std::endl; ublas_v2 = row(ublas_m1, std::size_t(7)); vcl_v2 = row(vcl_m1, std::size_t(7)); if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: diagonal extraction from matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Column extraction from matrix" << std::endl; ublas_v1 = column(ublas_m1, std::size_t(7)); vcl_v1 = column(vcl_m1, std::size_t(7)); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: diagonal extraction from matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); viennacl::copy(ublas_m2, vcl_m2); UblasMatrixType A = ublas_m2; std::cout << "Diagonal extraction from matrix" << std::endl; for (std::size_t i=0; i(-3)); if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: diagonal extraction from matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Matrix diagonal assignment from vector" << std::endl; A = ublas::scalar_matrix(A.size1(), A.size2(), NumericT(0)); for (std::size_t i=0; i(ublas_m1.size2()) - static_cast(A.size1())); if( std::fabs(diff(A, vcl_m2)) > epsilon ) { std::cout << "# Error at operation: Matrix assignment from diagonal" << std::endl; std::cout << " diff: " << std::fabs(diff(A, vcl_m2)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; } template int test_solve(Epsilon const & epsilon, UblasMatrixType & ublas_m1, UblasVectorType & ublas_v1, VCLMatrixType & vcl_m1, VCLVectorType1 & vcl_v1) { int retval = EXIT_SUCCESS; // sync data: //viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v1, vcl_v1); viennacl::copy(ublas_m1, vcl_m1); /////////////////// test direct solvers //////////////////////////// //upper triangular: std::cout << "Upper triangular solver" << std::endl; ublas_v1 = ublas::solve(ublas_m1, ublas_v1, ublas::upper_tag()); vcl_v1 = viennacl::linalg::solve(vcl_m1, vcl_v1, viennacl::linalg::upper_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: upper triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //upper unit triangular: std::cout << "Upper unit triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(ublas_m1, ublas_v1, ublas::unit_upper_tag()); vcl_v1 = viennacl::linalg::solve(vcl_m1, vcl_v1, viennacl::linalg::unit_upper_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: unit upper triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //lower triangular: std::cout << "Lower triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(ublas_m1, ublas_v1, ublas::lower_tag()); vcl_v1 = viennacl::linalg::solve(vcl_m1, vcl_v1, viennacl::linalg::lower_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: lower triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //lower unit triangular: std::cout << "Lower unit triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(ublas_m1, ublas_v1, ublas::unit_lower_tag()); vcl_v1 = viennacl::linalg::solve(vcl_m1, vcl_v1, viennacl::linalg::unit_lower_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: unit lower triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //transposed upper triangular: std::cout << "Transposed upper triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(trans(ublas_m1), ublas_v1, ublas::upper_tag()); vcl_v1 = viennacl::linalg::solve(trans(vcl_m1), vcl_v1, viennacl::linalg::upper_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: upper triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //transposed upper unit triangular: std::cout << "Transposed unit upper triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(trans(ublas_m1), ublas_v1, ublas::unit_upper_tag()); vcl_v1 = viennacl::linalg::solve(trans(vcl_m1), vcl_v1, viennacl::linalg::unit_upper_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: unit upper triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //transposed lower triangular: std::cout << "Transposed lower triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(trans(ublas_m1), ublas_v1, ublas::lower_tag()); vcl_v1 = viennacl::linalg::solve(trans(vcl_m1), vcl_v1, viennacl::linalg::lower_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: lower triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //transposed lower unit triangular: std::cout << "Transposed unit lower triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(trans(ublas_m1), ublas_v1, ublas::unit_lower_tag()); vcl_v1 = viennacl::linalg::solve(trans(vcl_m1), vcl_v1, viennacl::linalg::unit_lower_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: unit lower triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename F, typename Epsilon > int test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; std::size_t num_rows = 141; //note: use num_rows > num_cols + 3 for diag() tests to work std::size_t num_cols = 103; // -------------------------------------------------------------------------- ublas::vector ublas_v1(num_rows); for (std::size_t i = 0; i < ublas_v1.size(); ++i) ublas_v1(i) = random(); ublas::vector ublas_v2 = ublas::scalar_vector(num_cols, NumericT(3.1415)); ublas::matrix ublas_m1(ublas_v1.size(), ublas_v2.size()); for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = static_cast(0.1) * random(); ublas::matrix ublas_m2(ublas_v1.size(), ublas_v1.size()); for (std::size_t i = 0; i < ublas_m2.size1(); ++i) { for (std::size_t j = 0; j < ublas_m2.size2(); ++j) ublas_m2(i,j) = static_cast(-0.1) * random(); ublas_m2(i, i) = static_cast(2) + random(); } viennacl::vector vcl_v1_native(ublas_v1.size()); viennacl::vector vcl_v1_large(4 * ublas_v1.size()); viennacl::vector_range< viennacl::vector > vcl_v1_range(vcl_v1_large, viennacl::range(3, ublas_v1.size() + 3)); viennacl::vector_slice< viennacl::vector > vcl_v1_slice(vcl_v1_large, viennacl::slice(2, 3, ublas_v1.size())); viennacl::vector vcl_v2_native(ublas_v2.size()); viennacl::vector vcl_v2_large(4 * ublas_v2.size()); viennacl::vector_range< viennacl::vector > vcl_v2_range(vcl_v2_large, viennacl::range(8, ublas_v2.size() + 8)); viennacl::vector_slice< viennacl::vector > vcl_v2_slice(vcl_v2_large, viennacl::slice(6, 2, ublas_v2.size())); viennacl::matrix vcl_m1_native(ublas_m1.size1(), ublas_m1.size2()); viennacl::matrix vcl_m1_large(4 * ublas_m1.size1(), 4 * ublas_m1.size2()); viennacl::matrix_range< viennacl::matrix > vcl_m1_range(vcl_m1_large, viennacl::range(8, ublas_m1.size1() + 8), viennacl::range(ublas_m1.size2(), 2 * ublas_m1.size2()) ); viennacl::matrix_slice< viennacl::matrix > vcl_m1_slice(vcl_m1_large, viennacl::slice(6, 2, ublas_m1.size1()), viennacl::slice(ublas_m1.size2(), 2, ublas_m1.size2()) ); viennacl::matrix vcl_m2_native(ublas_m2.size1(), ublas_m2.size2()); viennacl::matrix vcl_m2_large(4 * ublas_m2.size1(), 4 * ublas_m2.size2()); viennacl::matrix_range< viennacl::matrix > vcl_m2_range(vcl_m2_large, viennacl::range(8, ublas_m2.size1() + 8), viennacl::range(ublas_m2.size2(), 2 * ublas_m2.size2()) ); viennacl::matrix_slice< viennacl::matrix > vcl_m2_slice(vcl_m2_large, viennacl::slice(6, 2, ublas_m2.size1()), viennacl::slice(ublas_m2.size2(), 2, ublas_m2.size2()) ); /* std::cout << "Matrix resizing (to larger)" << std::endl; matrix.resize(2*num_rows, 2*num_cols, true); for (unsigned int i = 0; i < matrix.size1(); ++i) { for (unsigned int j = (i epsilon ) { std::cout << "# Error at operation: matrix resize (to larger)" << std::endl; std::cout << " diff: " << std::fabs(diff(matrix, vcl_matrix)) << std::endl; return EXIT_FAILURE; } matrix(12, 14) = NumericT(1.9); matrix(19, 16) = NumericT(1.0); matrix (13, 15) = NumericT(-9); vcl_matrix(12, 14) = NumericT(1.9); vcl_matrix(19, 16) = NumericT(1.0); vcl_matrix (13, 15) = NumericT(-9); std::cout << "Matrix resizing (to smaller)" << std::endl; matrix.resize(result.size(), rhs.size(), true); vcl_matrix.resize(result.size(), rhs.size(), true); if( std::fabs(diff(matrix, vcl_matrix)) > epsilon ) { std::cout << "# Error at operation: matrix resize (to smaller)" << std::endl; std::cout << " diff: " << std::fabs(diff(matrix, vcl_matrix)) << std::endl; return EXIT_FAILURE; } */ // // Run a bunch of tests for rank-1-updates, matrix-vector products // std::cout << "------------ Testing rank-1-updates and matrix-vector products ------------------" << std::endl; std::cout << "* m = full, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_native, vcl_v2_native, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_native, vcl_v2_range, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_native, vcl_v2_slice, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = range std::cout << "* m = full, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_range, vcl_v2_native, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_range, vcl_v2_range, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_range, vcl_v2_slice, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = slice std::cout << "* m = full, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_slice, vcl_v2_native, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_slice, vcl_v2_range, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_slice, vcl_v2_slice, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; ///////////////////////////// matrix_range std::cout << "* m = range, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_native, vcl_v2_native, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_native, vcl_v2_range, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_native, vcl_v2_slice, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = range std::cout << "* m = range, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_range, vcl_v2_native, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_range, vcl_v2_range, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_range, vcl_v2_slice, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = slice std::cout << "* m = range, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_slice, vcl_v2_native, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_slice, vcl_v2_range, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_slice, vcl_v2_slice, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; ///////////////////////////// matrix_slice std::cout << "* m = slice, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_native, vcl_v2_native, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_native, vcl_v2_range, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_native, vcl_v2_slice, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = range std::cout << "* m = slice, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_range, vcl_v2_native, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_range, vcl_v2_range, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_range, vcl_v2_slice, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = slice std::cout << "* m = slice, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_slice, vcl_v2_native, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_slice, vcl_v2_range, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_slice, vcl_v2_slice, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // // Testing triangular solve() routines // std::cout << "------------ Testing triangular solves ------------------" << std::endl; std::cout << "* m = full, v1 = full" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_native, vcl_v1_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = range" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_native, vcl_v1_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = slice" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_native, vcl_v1_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; ///////// matrix_range std::cout << "* m = range, v1 = full" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_range, vcl_v1_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = range" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_range, vcl_v1_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = slice" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_range, vcl_v1_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; //////// matrix_slice std::cout << "* m = slice, v1 = full" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_slice, vcl_v1_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = range" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_slice, vcl_v1_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = slice" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_slice, vcl_v1_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; ////////////// Final test for full LU decomposition: //full solver: std::cout << "Full solver" << std::endl; unsigned int lu_dim = 100; ublas::matrix square_matrix(lu_dim, lu_dim); ublas::vector lu_rhs(lu_dim); viennacl::matrix vcl_square_matrix(lu_dim, lu_dim); viennacl::vector vcl_lu_rhs(lu_dim); for (std::size_t i=0; i(0.5) * random(); //put some more weight on diagonal elements: for (std::size_t j=0; j(20.0) + random(); lu_rhs(j) = random(); } viennacl::copy(square_matrix, vcl_square_matrix); viennacl::copy(lu_rhs, vcl_lu_rhs); //ublas:: ublas::lu_factorize(square_matrix); ublas::inplace_solve (square_matrix, lu_rhs, ublas::unit_lower_tag ()); ublas::inplace_solve (square_matrix, lu_rhs, ublas::upper_tag ()); // ViennaCL: viennacl::linalg::lu_factorize(vcl_square_matrix); //viennacl::copy(square_matrix, vcl_square_matrix); viennacl::linalg::lu_substitute(vcl_square_matrix, vcl_lu_rhs); if( std::fabs(diff(lu_rhs, vcl_lu_rhs)) > epsilon ) { std::cout << "# Error at operation: dense solver" << std::endl; std::cout << " diff: " << std::fabs(diff(lu_rhs, vcl_lu_rhs)) << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/matrix_col_float.cpp000644 001750 001750 00000003342 12267307531 021753 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_float_double.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, column-major, single precision " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; double epsilon = 1e-4; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " --- column-major ---" << std::endl; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/vector_int.cpp000644 001750 001750 00000150303 12267307531 020601 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include // // *** Boost // #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/vector.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/inner_prod.hpp" #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #include "Random.hpp" using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::scalar const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::entry_proxy const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ublas::vector const & v1, VCLVectorType const & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i int check(T1 const & t1, T2 const & t2) { int retval = EXIT_SUCCESS; if (diff(t1, t2) != 0) { std::cout << "# Error! Difference: " << std::abs(diff(t1, t2)) << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename UblasVectorType, typename ViennaCLVectorType1, typename ViennaCLVectorType2 > int test(UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; NumericT cpu_result = 42; viennacl::scalar gpu_result = 43; // // Initializer: // std::cout << "Checking for zero_vector initializer..." << std::endl; //ublas_v1 = ublas::zero_vector(ublas_v1.size()); for (std::size_t i=0; i(vcl_v1.size()); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for scalar_vector initializer..." << std::endl; //ublas_v1 = ublas::scalar_vector(ublas_v1.size(), cpu_result); for (std::size_t i=0; i(vcl_v1.size(), cpu_result); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; //ublas_v1 = ublas::scalar_vector(ublas_v1.size(), gpu_result); for (std::size_t i=0; i(vcl_v1.size(), gpu_result); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for unit_vector initializer..." << std::endl; //ublas_v1 = ublas::unit_vector(ublas_v1.size(), 5); for (std::size_t i=0; i(vcl_v1.size(), 5); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i(cpu_index), static_cast(gpu_index)) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- cpu_result = ublas_v1[index_norm_inf(ublas_v1)]; gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)]; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result = ublas_v1[index_norm_inf(ublas_v1 + ublas_v2)]; gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)]; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; // // Plane rotation and assignments // // -------------------------------------------------------------------------- ublas::vector x = ublas_v1; ublas::vector y = ublas_v2; ublas::vector t = ublas_v1; t.assign ( NumericT(1) * x + NumericT(2) * y), y.assign (- NumericT(2) * x + NumericT(1) * y), x.assign (t); viennacl::linalg::plane_rotation(vcl_v1, vcl_v2, NumericT(1), NumericT(2)); if (check(x, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(y, vcl_v2) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing assignments..." << std::endl; NumericT val = static_cast(1); for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = val; for (size_t i=0; i < vcl_v1.size(); ++i) vcl_v1(i) = val; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiplication and division of vectors by scalars // std::cout << "Testing scaling with CPU scalar..." << std::endl; NumericT alpha = static_cast(3); viennacl::scalar gpu_alpha = alpha; ublas_v1 *= alpha; vcl_v1 *= alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing scaling with GPU scalar..." << std::endl; ublas_v1 *= alpha; vcl_v1 *= gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; NumericT beta = static_cast(2); viennacl::scalar gpu_beta = beta; std::cout << "Testing shrinking with CPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing shrinking with GPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // add and inplace_add of vectors // for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing add on vector..." << std::endl; std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas_v1 + ublas_v2; vcl_v1 = vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing add on vector with flipsign..." << std::endl; ublas_v1 = - ublas_v1 + ublas_v2; vcl_v1 = - vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace-add on vector..." << std::endl; ublas_v1 += ublas_v2; vcl_v1 += vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // subtract and inplace_subtract of vectors // std::cout << "Testing sub on vector..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2; vcl_v1 = vcl_v1 - vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace-sub on vector..." << std::endl; ublas_v1 -= ublas_v2; vcl_v1 -= vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiply-add // std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + ublas_v2; vcl_v1 = alpha * vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = alpha * vcl_v1 + beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v1 + beta * ublas_v2; vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v1 - beta * ublas_v2; vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // division-add // std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2; vcl_v1 = vcl_v1 / alpha + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 / beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 * alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v2 / alpha; vcl_v1 += vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha - ublas_v2 / beta; vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 * alpha - ublas_v2 / beta; vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v2 * alpha; vcl_v1 += vcl_v2 * gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiply-subtract // std::cout << "Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - ublas_v2; vcl_v1 = alpha * vcl_v1 - vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - beta * ublas_v2; vcl_v1 = alpha * vcl_v1 - beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v2; vcl_v1 -= alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - beta * ublas_v2; vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v1 + beta * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v1 - beta * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // division-subtract // std::cout << "Testing division-subtract on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2 / alpha; vcl_v1 = vcl_v1 - vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2; vcl_v1 = vcl_v1 / alpha - vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2 / alpha; vcl_v1 = vcl_v1 / alpha - vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v2 / alpha; vcl_v1 -= vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v2 / alpha; vcl_v1 -= vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2 / alpha; vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2 / alpha; vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2 / beta; vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha - ublas_v2 / beta; vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-division-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 * alpha - ublas_v2 / beta; vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-multiply-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2 * beta; vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 * alpha + ublas_v2 / beta; vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 * alpha - ublas_v2 / beta; vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha - ublas_v2 * beta; vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // More complicated expressions (for ensuring the operator overloads work correctly) // for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing three vector additions..." << std::endl; ublas_v1 = ublas_v2 + ublas_v1 + ublas_v2; vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing complicated vector expression with CPU scalar..." << std::endl; ublas_v1 = beta * (ublas_v1 - alpha * ublas_v2); vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing complicated vector expression with GPU scalar..." << std::endl; ublas_v1 = beta * (ublas_v1 - alpha * ublas_v2); vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing swap..." << std::endl; swap(ublas_v1, ublas_v2); swap(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- for (std::size_t i=0; i int test() { int retval = EXIT_SUCCESS; std::size_t size = 12345; std::cout << "Running tests for vector of size " << size << std::endl; // // Set up UBLAS objects // ublas::vector ublas_full_vec(size); ublas::vector ublas_full_vec2(ublas_full_vec.size()); for (std::size_t i=0; i > ublas_range_vec(ublas_full_vec, r1); ublas::vector_range< ublas::vector > ublas_range_vec2(ublas_full_vec2, r2); ublas::slice s1( ublas_full_vec.size() / 4, 3, ublas_full_vec.size() / 4); ublas::slice s2(2 * ublas_full_vec2.size() / 4, 2, ublas_full_vec2.size() / 4); ublas::vector_slice< ublas::vector > ublas_slice_vec(ublas_full_vec, s1); ublas::vector_slice< ublas::vector > ublas_slice_vec2(ublas_full_vec2, s2); // // Set up ViennaCL objects // viennacl::vector vcl_full_vec(ublas_full_vec.size()); viennacl::vector vcl_full_vec2(ublas_full_vec2.size()); viennacl::fast_copy(ublas_full_vec.begin(), ublas_full_vec.end(), vcl_full_vec.begin()); viennacl::copy(ublas_full_vec2.begin(), ublas_full_vec2.end(), vcl_full_vec2.begin()); viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4); viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4); viennacl::vector_range< viennacl::vector > vcl_range_vec(vcl_full_vec, vcl_r1); viennacl::vector_range< viennacl::vector > vcl_range_vec2(vcl_full_vec2, vcl_r2); { viennacl::vector vcl_short_vec(vcl_range_vec); viennacl::vector vcl_short_vec2 = vcl_range_vec2; ublas::vector ublas_short_vec(ublas_range_vec); ublas::vector ublas_short_vec2(ublas_range_vec2); std::cout << "Testing creation of vectors from range..." << std::endl; if (check(ublas_short_vec, vcl_short_vec) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2) != EXIT_SUCCESS) return EXIT_FAILURE; } viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4); viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4); viennacl::vector_slice< viennacl::vector > vcl_slice_vec(vcl_full_vec, vcl_s1); viennacl::vector_slice< viennacl::vector > vcl_slice_vec2(vcl_full_vec2, vcl_s2); viennacl::vector vcl_short_vec(vcl_slice_vec); viennacl::vector vcl_short_vec2 = vcl_slice_vec2; ublas::vector ublas_short_vec(ublas_slice_vec); ublas::vector ublas_short_vec2(ublas_slice_vec2); std::cout << "Testing creation of vectors from slice..." << std::endl; if (check(ublas_short_vec, vcl_short_vec) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2) != EXIT_SUCCESS) return EXIT_FAILURE; // // Now start running tests for vectors, ranges and slices: // std::cout << " ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = range, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; return EXIT_SUCCESS; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector with Integer types" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: int" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: long" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/blas3_prod_float.cu000644 001750 001750 00000004165 12267307531 021473 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "blas3_prod_float_double.hpp" // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: BLAS 3 routines" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/matrix_row_double.cu000644 001750 001750 00000003440 12267307531 021776 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_float_double.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, row-major, double precision " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { double epsilon = 1e-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/vector_uint.cu000644 001750 001750 00000100320 12267307531 020605 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include // // *** Boost // #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/vector.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/inner_prod.hpp" #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #include "Random.hpp" using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::scalar const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::entry_proxy const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ublas::vector const & v1, VCLVectorType const & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i int check(T1 const & t1, T2 const & t2) { int retval = EXIT_SUCCESS; if (diff(t1, t2) != 0) { std::cout << "# Error! Difference: " << diff(t1, t2) << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename UblasVectorType, typename ViennaCLVectorType1, typename ViennaCLVectorType2 > int test(UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; NumericT cpu_result = 42; viennacl::scalar gpu_result = 43; // // Initializer: // std::cout << "Checking for zero_vector initializer..." << std::endl; //ublas_v1 = ublas::zero_vector(ublas_v1.size()); for (std::size_t i=0; i(vcl_v1.size()); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for scalar_vector initializer..." << std::endl; //ublas_v1 = ublas::scalar_vector(ublas_v1.size(), cpu_result); for (std::size_t i=0; i(vcl_v1.size(), cpu_result); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; //ublas_v1 = ublas::scalar_vector(ublas_v1.size(), gpu_result); for (std::size_t i=0; i(vcl_v1.size(), gpu_result); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for unit_vector initializer..." << std::endl; //ublas_v1 = ublas::unit_vector(ublas_v1.size(), 5); for (std::size_t i=0; i(vcl_v1.size(), 5); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i cpu_result) cpu_result = ublas_v1[i]; gpu_result = viennacl::linalg::norm_inf(vcl_v1); if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result2 = 0; for (std::size_t i=0; i cpu_result2) cpu_result2 = ublas_v1[i]; cpu_result = viennacl::linalg::norm_inf(vcl_v1); if (check(cpu_result, cpu_result2) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result2 = 0; for (std::size_t i=0; i cpu_result2) cpu_result2 = ublas_v1[i] + ublas_v2[i]; cpu_result = viennacl::linalg::norm_inf(vcl_v1 + vcl_v2); if (check(cpu_result, cpu_result2) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing index_norm_inf..." << std::endl; std::size_t cpu_index = 0; cpu_result = 0; for (std::size_t i=0; i cpu_result) { cpu_result = ublas_v1[i]; cpu_index = i; } std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1); if (check(static_cast(cpu_index), static_cast(gpu_index)) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)]; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_index = 0; cpu_result = 0; for (std::size_t i=0; i cpu_result) { cpu_result = ublas_v1[i]; cpu_index = i; } gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)]; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing assignments..." << std::endl; NumericT val = static_cast(1); for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = val; for (size_t i=0; i < vcl_v1.size(); ++i) vcl_v1(i) = val; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiplication and division of vectors by scalars // std::cout << "Testing scaling with CPU scalar..." << std::endl; NumericT alpha = static_cast(3); viennacl::scalar gpu_alpha = alpha; ublas_v1 *= alpha; vcl_v1 *= alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing scaling with GPU scalar..." << std::endl; ublas_v1 *= alpha; vcl_v1 *= gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; NumericT beta = static_cast(2); viennacl::scalar gpu_beta = beta; std::cout << "Testing shrinking with CPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing shrinking with GPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // add and inplace_add of vectors // for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing add on vector..." << std::endl; std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas_v1 + ublas_v2; vcl_v1 = vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace-add on vector..." << std::endl; ublas_v1 += ublas_v2; vcl_v1 += vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiply-add // std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + ublas_v2; vcl_v1 = alpha * vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = alpha * vcl_v1 + beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v1 + beta * ublas_v2; vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // division-add // std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2; vcl_v1 = vcl_v1 / alpha + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 / beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 * alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v2 / alpha; vcl_v1 += vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v2 * alpha; vcl_v1 += vcl_v2 * gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // More complicated expressions (for ensuring the operator overloads work correctly) // for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing three vector additions..." << std::endl; ublas_v1 = ublas_v2 + ublas_v1 + ublas_v2; vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing swap..." << std::endl; swap(ublas_v1, ublas_v2); swap(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing elementwise multiplication..." << std::endl; std::cout << " v1 = element_prod(v1, v2);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2); vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1, v2);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1, ublas_v2); vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1 + v2, v2);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1 + v2, v2);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1, v2 + v1);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1, v2 + v1);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1 + v2, v2 + v1);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1 + v2, v2 + v1);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing elementwise division..." << std::endl; for (std::size_t i=0; i int test() { int retval = EXIT_SUCCESS; std::size_t size = 12345; std::cout << "Running tests for vector of size " << size << std::endl; // // Set up UBLAS objects // ublas::vector ublas_full_vec(size); ublas::vector ublas_full_vec2(ublas_full_vec.size()); for (std::size_t i=0; i > ublas_range_vec(ublas_full_vec, r1); ublas::vector_range< ublas::vector > ublas_range_vec2(ublas_full_vec2, r2); ublas::slice s1( ublas_full_vec.size() / 4, 3, ublas_full_vec.size() / 4); ublas::slice s2(2 * ublas_full_vec2.size() / 4, 2, ublas_full_vec2.size() / 4); ublas::vector_slice< ublas::vector > ublas_slice_vec(ublas_full_vec, s1); ublas::vector_slice< ublas::vector > ublas_slice_vec2(ublas_full_vec2, s2); // // Set up ViennaCL objects // viennacl::vector vcl_full_vec(ublas_full_vec.size()); viennacl::vector vcl_full_vec2(ublas_full_vec2.size()); viennacl::fast_copy(ublas_full_vec.begin(), ublas_full_vec.end(), vcl_full_vec.begin()); viennacl::copy(ublas_full_vec2.begin(), ublas_full_vec2.end(), vcl_full_vec2.begin()); viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4); viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4); viennacl::vector_range< viennacl::vector > vcl_range_vec(vcl_full_vec, vcl_r1); viennacl::vector_range< viennacl::vector > vcl_range_vec2(vcl_full_vec2, vcl_r2); { viennacl::vector vcl_short_vec(vcl_range_vec); viennacl::vector vcl_short_vec2 = vcl_range_vec2; ublas::vector ublas_short_vec(ublas_range_vec); ublas::vector ublas_short_vec2(ublas_range_vec2); std::cout << "Testing creation of vectors from range..." << std::endl; if (check(ublas_short_vec, vcl_short_vec) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2) != EXIT_SUCCESS) return EXIT_FAILURE; } viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4); viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4); viennacl::vector_slice< viennacl::vector > vcl_slice_vec(vcl_full_vec, vcl_s1); viennacl::vector_slice< viennacl::vector > vcl_slice_vec2(vcl_full_vec2, vcl_s2); viennacl::vector vcl_short_vec(vcl_slice_vec); viennacl::vector vcl_short_vec2 = vcl_slice_vec2; ublas::vector ublas_short_vec(ublas_slice_vec); ublas::vector ublas_short_vec2(ublas_slice_vec2); std::cout << "Testing creation of vectors from slice..." << std::endl; if (check(ublas_short_vec, vcl_short_vec) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2) != EXIT_SUCCESS) return EXIT_FAILURE; // // Now start running tests for vectors, ranges and slices: // std::cout << " ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = range, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; return EXIT_SUCCESS; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector with Integer types" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: unsigned int" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: long" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/matrix_col_int.cpp000644 001750 001750 00000003544 12267307531 021444 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_int.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, column-major, integers " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: int" << std::endl; std::cout << " --- column-major ---" << std::endl; if (run_test() != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: long" << std::endl; std::cout << " --- column-major ---" << std::endl; if (run_test() != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/libviennacl_blas2.cu000644 001750 001750 00000034265 12267307531 021633 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /* * * Testing the ViennaCL BLAS-like shared library * */ // include necessary system headers #include #include // Some helper functions for this tutorial: #include "viennacl.hpp" #include "viennacl/vector.hpp" template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(std::vector const & v1, ViennaCLVectorType const & vcl_vec) { std::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); ScalarType inf_norm = 0; for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; if (v2_cpu[i] > inf_norm) inf_norm = v2_cpu[i]; } return inf_norm; } template void check(T const & t, U const & u, EpsilonT eps) { EpsilonT rel_error = diff(t,u); if (rel_error > eps) { std::cerr << "Relative error: " << rel_error << std::endl; std::cerr << "Aborting!" << std::endl; exit(EXIT_FAILURE); } std::cout << "SUCCESS "; } int main() { std::size_t size1 = 13; // at least 7 std::size_t size2 = 11; // at least 7 float eps_float = 1e-5f; double eps_double = 1e-12; ViennaCLBackend my_backend; ViennaCLBackendCreate(&my_backend); std::vector ref_float_x(size1); for (std::size_t i=0; i(i); std::vector ref_float_y(size2); for (std::size_t i=0; i(size2 - i); std::vector ref_float_A(size1*size2); for (std::size_t i=0; i(3*i); std::vector ref_float_B(size1*size2); for (std::size_t i=0; i(2*i); std::vector ref_double_x(size1, 1.0); for (std::size_t i=0; i(i); std::vector ref_double_y(size2, 2.0); for (std::size_t i=0; i(size2 - i); std::vector ref_double_A(size1*size2, 3.0); for (std::size_t i=0; i(3*i); std::vector ref_double_B(size1*size2, 4.0); for (std::size_t i=0; i(2*i); // Host setup viennacl::vector host_float_x = viennacl::scalar_vector(size1, 1.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_float_y = viennacl::scalar_vector(size2, 2.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_float_A = viennacl::scalar_vector(size1*size2, 3.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_float_B = viennacl::scalar_vector(size1*size2, 4.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_double_x = viennacl::scalar_vector(size1, 1.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_double_y = viennacl::scalar_vector(size2, 2.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_double_A = viennacl::scalar_vector(size1*size2, 3.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_double_B = viennacl::scalar_vector(size1*size2, 4.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i cuda_float_x = viennacl::scalar_vector(size1, 1.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_float_y = viennacl::scalar_vector(size2, 2.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_float_A = viennacl::scalar_vector(size1*size2, 3.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_float_B = viennacl::scalar_vector(size1*size2, 4.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_double_x = viennacl::scalar_vector(size1, 1.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_double_y = viennacl::scalar_vector(size2, 2.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_double_A = viennacl::scalar_vector(size1*size2, 3.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_double_B = viennacl::scalar_vector(size1*size2, 4.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i opencl_float_x = viennacl::scalar_vector(size1, 1.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i opencl_float_y = viennacl::scalar_vector(size2, 2.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i opencl_float_A = viennacl::scalar_vector(size1*size2, 3.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i opencl_float_B = viennacl::scalar_vector(size1*size2, 4.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i *opencl_double_x = NULL; viennacl::vector *opencl_double_y = NULL; viennacl::vector *opencl_double_A = NULL; viennacl::vector *opencl_double_B = NULL; if( viennacl::ocl::current_device().double_support() ) { opencl_double_x = new viennacl::vector(viennacl::scalar_vector(size1, 1.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i(viennacl::scalar_vector(size2, 2.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i(viennacl::scalar_vector(size1*size2, 3.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i(viennacl::scalar_vector(size1*size2, 4.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i(host_float_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 1, 3, 0.1234f, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_x), 1, 2); check(ref_float_x, host_float_x, eps_float); ViennaCLHostDgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 1, 3, 0.1234, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 1, 2); check(ref_double_x, host_double_x, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415f, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_A), 2, 1, 2, 3, size2, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 1, 3, 0.1234f, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 1, 2); check(ref_float_x, cuda_float_x, eps_float); ViennaCLCUDADgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_A), 2, 1, 2, 3, size2, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 1, 3, 0.1234, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 1, 2); check(ref_double_x, cuda_double_x, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415f, viennacl::traits::opencl_handle(opencl_float_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::traits::opencl_handle(opencl_float_y), 1, 3, 0.1234f, viennacl::traits::opencl_handle(opencl_float_x), 1, 2); check(ref_float_x, opencl_float_x, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::traits::opencl_handle(*opencl_double_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::traits::opencl_handle(*opencl_double_y), 1, 3, 0.1234, viennacl::traits::opencl_handle(*opencl_double_x), 1, 2); check(ref_double_x, *opencl_double_x, eps_double); } #endif #ifdef VIENNACL_WITH_OPENCL delete opencl_double_x; delete opencl_double_y; delete opencl_double_A; delete opencl_double_B; #endif ViennaCLBackendDestroy(&my_backend); // // That's it. // std::cout << std::endl << "!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/libviennacl_blas1.cpp000644 001750 001750 00000065224 12267307531 022004 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /* * * Testing the ViennaCL BLAS-like shared library * */ // include necessary system headers #include #include #include // Some helper functions for this tutorial: #include "viennacl.hpp" #include "viennacl/vector.hpp" template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { if (s1 != s2) return (s1 - s2) / std::max(static_cast(std::fabs(static_cast(s1))), static_cast(std::fabs(static_cast(s2)))); return ScalarType(0); } template ScalarType diff(std::vector const & v1, ViennaCLVectorType const & vcl_vec) { std::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); ScalarType inf_norm = 0; for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; if (v2_cpu[i] > inf_norm) inf_norm = v2_cpu[i]; } return inf_norm; } template void check(T const & t, U const & u, EpsilonT eps) { EpsilonT rel_error = static_cast(diff(t,u)); if (rel_error > eps) { std::cerr << "Relative error: " << rel_error << std::endl; std::cerr << "Aborting!" << std::endl; exit(EXIT_FAILURE); } std::cout << "SUCCESS "; } int main() { std::size_t size = 10; // at least 7 float eps_float = 1e-5f; double eps_double = 1e-12; float ref_float_alpha; double ref_double_alpha; std::vector ref_float_x(size, 1.0f); std::vector ref_float_y(size, 2.0f); std::vector ref_double_x(size, 1.0); std::vector ref_double_y(size, 2.0); ViennaCLBackend my_backend; ViennaCLBackendCreate(&my_backend); // Host setup float host_float_alpha = 0; viennacl::vector host_float_x = viennacl::scalar_vector(size, 1.0f, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::vector host_float_y = viennacl::scalar_vector(size, 2.0f, viennacl::context(viennacl::MAIN_MEMORY)); double host_double_alpha = 0; viennacl::vector host_double_x = viennacl::scalar_vector(size, 1.0, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::vector host_double_y = viennacl::scalar_vector(size, 2.0, viennacl::context(viennacl::MAIN_MEMORY)); // CUDA setup #ifdef VIENNACL_WITH_CUDA float cuda_float_alpha = 0; viennacl::vector cuda_float_x = viennacl::scalar_vector(size, 1.0f, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::vector cuda_float_y = viennacl::scalar_vector(size, 2.0f, viennacl::context(viennacl::CUDA_MEMORY)); double cuda_double_alpha = 0; viennacl::vector cuda_double_x = viennacl::scalar_vector(size, 1.0, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::vector cuda_double_y = viennacl::scalar_vector(size, 2.0, viennacl::context(viennacl::CUDA_MEMORY)); #endif // OpenCL setup #ifdef VIENNACL_WITH_OPENCL ViennaCLInt context_id = 0; float opencl_float_alpha = 0; viennacl::vector opencl_float_x = viennacl::scalar_vector(size, 1.0f, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::vector opencl_float_y = viennacl::scalar_vector(size, 2.0f, viennacl::context(viennacl::ocl::get_context(context_id))); double opencl_double_alpha = 0; viennacl::vector *opencl_double_x = NULL; viennacl::vector *opencl_double_y = NULL; if( viennacl::ocl::current_device().double_support() ) { opencl_double_x = new viennacl::vector(viennacl::scalar_vector(size, 1.0, viennacl::context(viennacl::ocl::get_context(context_id)))); opencl_double_y = new viennacl::vector(viennacl::scalar_vector(size, 2.0, viennacl::context(viennacl::ocl::get_context(context_id)))); } ViennaCLBackendSetOpenCLContextID(my_backend, context_id); #endif // consistency checks: check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // ASUM std::cout << std::endl << "-- Testing xASUM..."; ref_float_alpha = 0; ref_double_alpha = 0; for (std::size_t i=0; i(host_float_x), 2, 3); check(ref_float_alpha, host_float_alpha, eps_float); ViennaCLHostDasum(my_backend, ViennaCLInt(size/4), &host_double_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 2, 3); check(ref_double_alpha, host_double_alpha, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASasum(my_backend, ViennaCLInt(size/4), &cuda_float_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 2, 3); check(ref_float_alpha, cuda_float_alpha, eps_float); ViennaCLCUDADasum(my_backend, ViennaCLInt(size/4), &cuda_double_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 2, 3); check(ref_double_alpha, cuda_double_alpha, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSasum(my_backend, ViennaCLInt(size/4), &opencl_float_alpha, viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 3); check(ref_float_alpha, opencl_float_alpha, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDasum(my_backend, ViennaCLInt(size/4), &opencl_double_alpha, viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 3); check(ref_double_alpha, opencl_double_alpha, eps_double); } #endif // AXPY std::cout << std::endl << "-- Testing xAXPY..."; for (std::size_t i=0; i(host_float_x), 0, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 1, 2); check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDaxpy(my_backend, ViennaCLInt(size/3), 2.0, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 0, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 1, 2); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASaxpy(my_backend, ViennaCLInt(size/3), 2.0f, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 0, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 1, 2); check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADaxpy(my_backend, ViennaCLInt(size/3), 2.0, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 0, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 1, 2); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSaxpy(my_backend, ViennaCLInt(size/3), 2.0f, viennacl::traits::opencl_handle(opencl_float_x).get(), 0, 2, viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2); check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDaxpy(my_backend, ViennaCLInt(size/3), 2.0, viennacl::traits::opencl_handle(*opencl_double_x).get(), 0, 2, viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2); check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // COPY std::cout << std::endl << "-- Testing xCOPY..."; for (std::size_t i=0; i(host_float_x), 1, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 0, 2); check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDcopy(my_backend, ViennaCLInt(size/3), viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 1, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 0, 2); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDAScopy(my_backend, ViennaCLInt(size/3), viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 1, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 0, 2); check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADcopy(my_backend, ViennaCLInt(size/3), viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 1, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 0, 2); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLScopy(my_backend, ViennaCLInt(size/3), viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 2, viennacl::traits::opencl_handle(opencl_float_y).get(), 0, 2); check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDcopy(my_backend, ViennaCLInt(size/3), viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 2, viennacl::traits::opencl_handle(*opencl_double_y).get(), 0, 2); check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // DOT std::cout << std::endl << "-- Testing xDOT..."; ref_float_alpha = 0; ref_double_alpha = 0; for (std::size_t i=0; i(host_float_x), 2, 1, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 3, 1); check(ref_float_alpha, host_float_alpha, eps_float); ViennaCLHostDdot(my_backend, ViennaCLInt(size/2), &host_double_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 2, 1, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 3, 1); check(ref_double_alpha, host_double_alpha, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASdot(my_backend, ViennaCLInt(size/2), &cuda_float_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 2, 1, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 3, 1); check(ref_float_alpha, cuda_float_alpha, eps_float); ViennaCLCUDADdot(my_backend, ViennaCLInt(size/2), &cuda_double_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 2, 1, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 3, 1); check(ref_double_alpha, cuda_double_alpha, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSdot(my_backend, ViennaCLInt(size/2), &opencl_float_alpha, viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 1, viennacl::traits::opencl_handle(opencl_float_y).get(), 3, 1); check(ref_float_alpha, opencl_float_alpha, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDdot(my_backend, ViennaCLInt(size/2), &opencl_double_alpha, viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 1, viennacl::traits::opencl_handle(*opencl_double_y).get(), 3, 1); check(ref_double_alpha, opencl_double_alpha, eps_double); } #endif // NRM2 std::cout << std::endl << "-- Testing xNRM2..."; ref_float_alpha = 0; ref_double_alpha = 0; for (std::size_t i=0; i(host_float_x), 1, 2); check(ref_float_alpha, host_float_alpha, eps_float); ViennaCLHostDnrm2(my_backend, ViennaCLInt(size/3), &host_double_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 1, 2); check(ref_double_alpha, host_double_alpha, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASnrm2(my_backend, ViennaCLInt(size/3), &cuda_float_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 1, 2); check(ref_float_alpha, cuda_float_alpha, eps_float); ViennaCLCUDADnrm2(my_backend, ViennaCLInt(size/3), &cuda_double_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 1, 2); check(ref_double_alpha, cuda_double_alpha, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSnrm2(my_backend, ViennaCLInt(size/3), &opencl_float_alpha, viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 2); check(ref_float_alpha, opencl_float_alpha, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDnrm2(my_backend, ViennaCLInt(size/3), &opencl_double_alpha, viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 2); check(ref_double_alpha, opencl_double_alpha, eps_double); } #endif // ROT std::cout << std::endl << "-- Testing xROT..."; for (std::size_t i=0; i(host_float_x), 2, 3, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 1, 2, 0.6f, 0.8f); check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDrot(my_backend, ViennaCLInt(size/4), viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 2, 3, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 1, 2, 0.6, 0.8); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASrot(my_backend, ViennaCLInt(size/4), viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 2, 3, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 1, 2, 0.6f, 0.8f); check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADrot(my_backend, ViennaCLInt(size/4), viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 2, 3, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 1, 2, 0.6, 0.8); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSrot(my_backend, ViennaCLInt(size/4), viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 3, viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2, 0.6f, 0.8f); check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDrot(my_backend, ViennaCLInt(size/4), viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 3, viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2, 0.6, 0.8); check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // SCAL std::cout << std::endl << "-- Testing xSCAL..."; for (std::size_t i=0; i(host_float_x), 1, 3); check(ref_float_x, host_float_x, eps_float); ViennaCLHostDscal(my_backend, ViennaCLInt(size/4), 2.0, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 1, 3); check(ref_double_x, host_double_x, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASscal(my_backend, ViennaCLInt(size/4), 2.0f, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 1, 3); check(ref_float_x, cuda_float_x, eps_float); ViennaCLCUDADscal(my_backend, ViennaCLInt(size/4), 2.0, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 1, 3); check(ref_double_x, cuda_double_x, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSscal(my_backend, ViennaCLInt(size/4), 2.0f, viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 3); check(ref_float_x, opencl_float_x, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDscal(my_backend, ViennaCLInt(size/4), 2.0, viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 3); check(ref_double_x, *opencl_double_x, eps_double); } #endif // SWAP std::cout << std::endl << "-- Testing xSWAP..."; for (std::size_t i=0; i(host_float_x), 2, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 1, 2); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDswap(my_backend, ViennaCLInt(size/3), viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 2, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 1, 2); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASswap(my_backend, ViennaCLInt(size/3), viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 2, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 1, 2); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADswap(my_backend, ViennaCLInt(size/3), viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 2, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 1, 2); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSswap(my_backend, ViennaCLInt(size/3), viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 2, viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDswap(my_backend, ViennaCLInt(size/3), viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 2, viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2); check(ref_double_y, *opencl_double_y, eps_double); } #endif // IAMAX std::cout << std::endl << "-- Testing IxASUM..."; ViennaCLInt ref_index = 0; ref_float_alpha = 0; for (std::size_t i=0; i std::fabs(ref_float_alpha)) { ref_index = ViennaCLInt(i); ref_float_alpha = std::fabs(ref_float_x[0 + 2*i]); } } std::cout << std::endl << "Host: "; ViennaCLInt idx = 0; ViennaCLHostiSamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_x), 0, 2); check(static_cast(ref_index), static_cast(idx), eps_float); idx = 0; ViennaCLHostiDamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 0, 2); check(ref_index, idx, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; idx = 0; ViennaCLCUDAiSamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 0, 2); check(ref_float_x[2*ref_index], ref_float_x[2*idx], eps_float); idx = 0; ViennaCLCUDAiDamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 0, 2); check(ref_double_x[2*ref_index], ref_double_x[2*idx], eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; idx = 0; ViennaCLOpenCLiSamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::traits::opencl_handle(opencl_float_x).get(), 0, 2); check(ref_float_x[2*ref_index], ref_float_x[2*idx], eps_float); idx = 0; if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLiDamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::traits::opencl_handle(*opencl_double_x).get(), 0, 2); check(ref_double_x[2*ref_index], ref_double_x[2*idx], eps_double); } #endif #ifdef VIENNACL_WITH_OPENCL //cleanup if( viennacl::ocl::current_device().double_support() ) { delete opencl_double_x; delete opencl_double_y; } #endif ViennaCLBackendDestroy(&my_backend); // // That's it. // std::cout << std::endl << "!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/external_1.cpp000644 001750 001750 00000005323 12267307531 020470 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // A check for the absence of external linkage (otherwise, library is not truly 'header-only') // //#define VIENNACL_WITH_EIGEN #define VIENNACL_WITH_UBLAS // // *** System // #include // // *** ViennaCL // #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/coordinate_matrix.hpp" #include "viennacl/ell_matrix.hpp" #include "viennacl/hyb_matrix.hpp" #ifdef VIENNACL_WITH_OPENCL #include "viennacl/circulant_matrix.hpp" #include "viennacl/hankel_matrix.hpp" #include "viennacl/toeplitz_matrix.hpp" #include "viennacl/vandermonde_matrix.hpp" #endif #include "viennacl/linalg/ilu.hpp" #include "viennacl/linalg/row_scaling.hpp" #include "viennacl/linalg/jacobi_precond.hpp" #include "viennacl/linalg/cg.hpp" #include "viennacl/linalg/bicgstab.hpp" #include "viennacl/linalg/gmres.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/qr.hpp" #include "viennacl/misc/bandwidth_reduction.hpp" #ifdef VIENNACL_WITH_OPENCL #include "viennacl/linalg/amg.hpp" #include "viennacl/linalg/spai.hpp" #include "viennacl/linalg/svd.hpp" #include "viennacl/fft.hpp" #include "viennacl/generator/generate.hpp" #endif #include "viennacl/io/matrix_market.hpp" #include "viennacl/scheduler/execute.hpp" //defined in external_2.cpp void other_func(); // // ------------------------------------------------------------- // int main() { typedef float NumericType; //doing nothing but instantiating a few types viennacl::scalar s; viennacl::vector v(10); viennacl::matrix m(10, 10); viennacl::compressed_matrix compr(10, 10); viennacl::coordinate_matrix coord(10, 10); //this is the external linkage check: other_func(); std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/matrix_row_int.cu000644 001750 001750 00000003525 12267307531 021322 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_int.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, row-major, integers " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: int" << std::endl; std::cout << " --- row-major ---" << std::endl; if (run_test() != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: long" << std::endl; std::cout << " --- row-major ---" << std::endl; if (run_test() != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/vector_int.cu000644 001750 001750 00000150303 12267307531 020426 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include // // *** Boost // #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/vector.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/inner_prod.hpp" #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #include "Random.hpp" using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::scalar const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::entry_proxy const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ublas::vector const & v1, VCLVectorType const & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i int check(T1 const & t1, T2 const & t2) { int retval = EXIT_SUCCESS; if (diff(t1, t2) != 0) { std::cout << "# Error! Difference: " << std::abs(diff(t1, t2)) << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename UblasVectorType, typename ViennaCLVectorType1, typename ViennaCLVectorType2 > int test(UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; NumericT cpu_result = 42; viennacl::scalar gpu_result = 43; // // Initializer: // std::cout << "Checking for zero_vector initializer..." << std::endl; //ublas_v1 = ublas::zero_vector(ublas_v1.size()); for (std::size_t i=0; i(vcl_v1.size()); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for scalar_vector initializer..." << std::endl; //ublas_v1 = ublas::scalar_vector(ublas_v1.size(), cpu_result); for (std::size_t i=0; i(vcl_v1.size(), cpu_result); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; //ublas_v1 = ublas::scalar_vector(ublas_v1.size(), gpu_result); for (std::size_t i=0; i(vcl_v1.size(), gpu_result); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for unit_vector initializer..." << std::endl; //ublas_v1 = ublas::unit_vector(ublas_v1.size(), 5); for (std::size_t i=0; i(vcl_v1.size(), 5); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i(cpu_index), static_cast(gpu_index)) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- cpu_result = ublas_v1[index_norm_inf(ublas_v1)]; gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)]; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result = ublas_v1[index_norm_inf(ublas_v1 + ublas_v2)]; gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)]; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; // // Plane rotation and assignments // // -------------------------------------------------------------------------- ublas::vector x = ublas_v1; ublas::vector y = ublas_v2; ublas::vector t = ublas_v1; t.assign ( NumericT(1) * x + NumericT(2) * y), y.assign (- NumericT(2) * x + NumericT(1) * y), x.assign (t); viennacl::linalg::plane_rotation(vcl_v1, vcl_v2, NumericT(1), NumericT(2)); if (check(x, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(y, vcl_v2) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing assignments..." << std::endl; NumericT val = static_cast(1); for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = val; for (size_t i=0; i < vcl_v1.size(); ++i) vcl_v1(i) = val; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiplication and division of vectors by scalars // std::cout << "Testing scaling with CPU scalar..." << std::endl; NumericT alpha = static_cast(3); viennacl::scalar gpu_alpha = alpha; ublas_v1 *= alpha; vcl_v1 *= alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing scaling with GPU scalar..." << std::endl; ublas_v1 *= alpha; vcl_v1 *= gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; NumericT beta = static_cast(2); viennacl::scalar gpu_beta = beta; std::cout << "Testing shrinking with CPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing shrinking with GPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // add and inplace_add of vectors // for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing add on vector..." << std::endl; std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas_v1 + ublas_v2; vcl_v1 = vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing add on vector with flipsign..." << std::endl; ublas_v1 = - ublas_v1 + ublas_v2; vcl_v1 = - vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace-add on vector..." << std::endl; ublas_v1 += ublas_v2; vcl_v1 += vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // subtract and inplace_subtract of vectors // std::cout << "Testing sub on vector..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2; vcl_v1 = vcl_v1 - vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace-sub on vector..." << std::endl; ublas_v1 -= ublas_v2; vcl_v1 -= vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiply-add // std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + ublas_v2; vcl_v1 = alpha * vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = alpha * vcl_v1 + beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v1 + beta * ublas_v2; vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v1 - beta * ublas_v2; vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // division-add // std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2; vcl_v1 = vcl_v1 / alpha + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 / beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 * alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v2 / alpha; vcl_v1 += vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha - ublas_v2 / beta; vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 * alpha - ublas_v2 / beta; vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v2 * alpha; vcl_v1 += vcl_v2 * gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiply-subtract // std::cout << "Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - ublas_v2; vcl_v1 = alpha * vcl_v1 - vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - beta * ublas_v2; vcl_v1 = alpha * vcl_v1 - beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v2; vcl_v1 -= alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - beta * ublas_v2; vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v1 + beta * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v1 - beta * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // division-subtract // std::cout << "Testing division-subtract on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2 / alpha; vcl_v1 = vcl_v1 - vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2; vcl_v1 = vcl_v1 / alpha - vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2 / alpha; vcl_v1 = vcl_v1 / alpha - vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v2 / alpha; vcl_v1 -= vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v2 / alpha; vcl_v1 -= vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2 / alpha; vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2 / alpha; vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2 / beta; vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha - ublas_v2 / beta; vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-division-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 * alpha - ublas_v2 / beta; vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-multiply-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2 * beta; vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 * alpha + ublas_v2 / beta; vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 * alpha - ublas_v2 / beta; vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha - ublas_v2 * beta; vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // More complicated expressions (for ensuring the operator overloads work correctly) // for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing three vector additions..." << std::endl; ublas_v1 = ublas_v2 + ublas_v1 + ublas_v2; vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing complicated vector expression with CPU scalar..." << std::endl; ublas_v1 = beta * (ublas_v1 - alpha * ublas_v2); vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing complicated vector expression with GPU scalar..." << std::endl; ublas_v1 = beta * (ublas_v1 - alpha * ublas_v2); vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing swap..." << std::endl; swap(ublas_v1, ublas_v2); swap(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- for (std::size_t i=0; i int test() { int retval = EXIT_SUCCESS; std::size_t size = 12345; std::cout << "Running tests for vector of size " << size << std::endl; // // Set up UBLAS objects // ublas::vector ublas_full_vec(size); ublas::vector ublas_full_vec2(ublas_full_vec.size()); for (std::size_t i=0; i > ublas_range_vec(ublas_full_vec, r1); ublas::vector_range< ublas::vector > ublas_range_vec2(ublas_full_vec2, r2); ublas::slice s1( ublas_full_vec.size() / 4, 3, ublas_full_vec.size() / 4); ublas::slice s2(2 * ublas_full_vec2.size() / 4, 2, ublas_full_vec2.size() / 4); ublas::vector_slice< ublas::vector > ublas_slice_vec(ublas_full_vec, s1); ublas::vector_slice< ublas::vector > ublas_slice_vec2(ublas_full_vec2, s2); // // Set up ViennaCL objects // viennacl::vector vcl_full_vec(ublas_full_vec.size()); viennacl::vector vcl_full_vec2(ublas_full_vec2.size()); viennacl::fast_copy(ublas_full_vec.begin(), ublas_full_vec.end(), vcl_full_vec.begin()); viennacl::copy(ublas_full_vec2.begin(), ublas_full_vec2.end(), vcl_full_vec2.begin()); viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4); viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4); viennacl::vector_range< viennacl::vector > vcl_range_vec(vcl_full_vec, vcl_r1); viennacl::vector_range< viennacl::vector > vcl_range_vec2(vcl_full_vec2, vcl_r2); { viennacl::vector vcl_short_vec(vcl_range_vec); viennacl::vector vcl_short_vec2 = vcl_range_vec2; ublas::vector ublas_short_vec(ublas_range_vec); ublas::vector ublas_short_vec2(ublas_range_vec2); std::cout << "Testing creation of vectors from range..." << std::endl; if (check(ublas_short_vec, vcl_short_vec) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2) != EXIT_SUCCESS) return EXIT_FAILURE; } viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4); viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4); viennacl::vector_slice< viennacl::vector > vcl_slice_vec(vcl_full_vec, vcl_s1); viennacl::vector_slice< viennacl::vector > vcl_slice_vec2(vcl_full_vec2, vcl_s2); viennacl::vector vcl_short_vec(vcl_slice_vec); viennacl::vector vcl_short_vec2 = vcl_slice_vec2; ublas::vector ublas_short_vec(ublas_slice_vec); ublas::vector ublas_short_vec2(ublas_slice_vec2); std::cout << "Testing creation of vectors from slice..." << std::endl; if (check(ublas_short_vec, vcl_short_vec) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2) != EXIT_SUCCESS) return EXIT_FAILURE; // // Now start running tests for vectors, ranges and slices: // std::cout << " ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = range, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; return EXIT_SUCCESS; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector with Integer types" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: int" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: long" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/matrix_vector_int.cpp000644 001750 001750 00000071206 12267307531 022171 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/lu.hpp" #include "examples/tutorial/Random.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return 1; return 0; } template ScalarType diff(ublas::vector const & v1, VCLVectorType const & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i ScalarType diff(ublas::matrix const & mat1, VCLMatrixType const & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { if (mat2_cpu(i,j) != mat1(i,j)) return 1; } } //std::cout << ret << std::endl; return 0; } // // ------------------------------------------------------------- // template int test_prod_rank1(UblasMatrixType & ublas_m1, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, VCLMatrixType & vcl_m1, VCLVectorType1 & vcl_v1, VCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; // sync data: ublas_v1 = ublas::scalar_vector(ublas_v1.size(), NumericT(2)); ublas_v2 = ublas::scalar_vector(ublas_v2.size(), NumericT(3)); viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); viennacl::copy(ublas_m1, vcl_m1); // -------------------------------------------------------------------------- std::cout << "Rank 1 update" << std::endl; ublas_m1 += ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2); if( diff(ublas_m1, vcl_m1) != 0 ) { std::cout << "# Error at operation: rank 1 update" << std::endl; std::cout << " diff: " << diff(ublas_m1, vcl_m1) << std::endl; return EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Scaled rank 1 update - CPU Scalar" << std::endl; ublas_m1 += NumericT(4) * ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += NumericT(2) * viennacl::linalg::outer_prod(vcl_v1, vcl_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2) * NumericT(2); //check proper compilation if( diff(ublas_m1, vcl_m1) != 0 ) { std::cout << "# Error at operation: scaled rank 1 update - CPU Scalar" << std::endl; std::cout << " diff: " << diff(ublas_m1, vcl_m1) << std::endl; return EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Scaled rank 1 update - GPU Scalar" << std::endl; ublas_m1 += NumericT(4) * ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += viennacl::scalar(2) * viennacl::linalg::outer_prod(vcl_v1, vcl_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2) * viennacl::scalar(2); //check proper compilation if( diff(ublas_m1, vcl_m1) != 0 ) { std::cout << "# Error at operation: scaled rank 1 update - GPU Scalar" << std::endl; std::cout << " diff: " << diff(ublas_m1, vcl_m1) << std::endl; return EXIT_FAILURE; } //reset vcl_matrix: viennacl::copy(ublas_m1, vcl_m1); // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product" << std::endl; ublas_v1 = viennacl::linalg::prod(ublas_m1, ublas_v2); vcl_v1 = viennacl::linalg::prod(vcl_m1, vcl_v2); if( diff(ublas_v1, vcl_v1) != 0 ) { std::cout << "# Error at operation: matrix-vector product" << std::endl; std::cout << " diff: " << diff(ublas_v1, vcl_v1) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product with scaled add" << std::endl; NumericT alpha = static_cast(2); NumericT beta = static_cast(3); viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * viennacl::linalg::prod(ublas_m1, ublas_v2) + beta * ublas_v1; vcl_v1 = alpha * viennacl::linalg::prod(vcl_m1, vcl_v2) + beta * vcl_v1; if( diff(ublas_v1, vcl_v1) != 0 ) { std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << diff(ublas_v1, vcl_v1) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Transposed Matrix-Vector product" << std::endl; ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1); vcl_v2 = alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1); if( diff(ublas_v2, vcl_v2) != 0 ) { std::cout << "# Error at operation: transposed matrix-vector product" << std::endl; std::cout << " diff: " << diff(ublas_v2, vcl_v2) << std::endl; retval = EXIT_FAILURE; } std::cout << "Transposed Matrix-Vector product with scaled add" << std::endl; ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1) + beta * ublas_v2; vcl_v2 = alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1) + beta * vcl_v2; if( diff(ublas_v2, vcl_v2) != 0 ) { std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << diff(ublas_v2, vcl_v2) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename F> int test() { int retval = EXIT_SUCCESS; std::size_t num_rows = 141; std::size_t num_cols = 103; // -------------------------------------------------------------------------- ublas::vector ublas_v1(num_rows); for (std::size_t i = 0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas::vector ublas_v2 = ublas::scalar_vector(num_cols, NumericT(3)); ublas::matrix ublas_m1(ublas_v1.size(), ublas_v2.size()); ublas::matrix ublas_m2(ublas_v1.size(), ublas_v1.size()); for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); for (std::size_t i = 0; i < ublas_m2.size1(); ++i) for (std::size_t j = 0; j < ublas_m2.size2(); ++j) ublas_m2(i,j) = NumericT(j - i*j + i); viennacl::vector vcl_v1_native(ublas_v1.size()); viennacl::vector vcl_v1_large(4 * ublas_v1.size()); viennacl::vector_range< viennacl::vector > vcl_v1_range(vcl_v1_large, viennacl::range(3, ublas_v1.size() + 3)); viennacl::vector_slice< viennacl::vector > vcl_v1_slice(vcl_v1_large, viennacl::slice(2, 3, ublas_v1.size())); viennacl::vector vcl_v2_native(ublas_v2.size()); viennacl::vector vcl_v2_large(4 * ublas_v2.size()); viennacl::vector_range< viennacl::vector > vcl_v2_range(vcl_v2_large, viennacl::range(8, ublas_v2.size() + 8)); viennacl::vector_slice< viennacl::vector > vcl_v2_slice(vcl_v2_large, viennacl::slice(6, 2, ublas_v2.size())); viennacl::matrix vcl_m1_native(ublas_m1.size1(), ublas_m1.size2()); viennacl::matrix vcl_m1_large(4 * ublas_m1.size1(), 4 * ublas_m1.size2()); viennacl::matrix_range< viennacl::matrix > vcl_m1_range(vcl_m1_large, viennacl::range(8, ublas_m1.size1() + 8), viennacl::range(ublas_m1.size2(), 2 * ublas_m1.size2()) ); viennacl::matrix_slice< viennacl::matrix > vcl_m1_slice(vcl_m1_large, viennacl::slice(6, 2, ublas_m1.size1()), viennacl::slice(ublas_m1.size2(), 2, ublas_m1.size2()) ); viennacl::matrix vcl_m2_native(ublas_m2.size1(), ublas_m2.size2()); viennacl::matrix vcl_m2_large(4 * ublas_m2.size1(), 4 * ublas_m2.size2()); viennacl::matrix_range< viennacl::matrix > vcl_m2_range(vcl_m2_large, viennacl::range(8, ublas_m2.size1() + 8), viennacl::range(ublas_m2.size2(), 2 * ublas_m2.size2()) ); viennacl::matrix_slice< viennacl::matrix > vcl_m2_slice(vcl_m2_large, viennacl::slice(6, 2, ublas_m2.size1()), viennacl::slice(ublas_m2.size2(), 2, ublas_m2.size2()) ); // // Run a bunch of tests for rank-1-updates, matrix-vector products // std::cout << "------------ Testing rank-1-updates and matrix-vector products ------------------" << std::endl; std::cout << "* m = full, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_native, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_native, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_native, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = range std::cout << "* m = full, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_range, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_range, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_range, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = slice std::cout << "* m = full, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_slice, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_slice, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = full, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_slice, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); ///////////////////////////// matrix_range std::cout << "* m = range, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_native, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_native, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_native, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = range std::cout << "* m = range, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_range, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_range, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_range, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = slice std::cout << "* m = range, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_slice, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_slice, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = range, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_slice, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); ///////////////////////////// matrix_slice std::cout << "* m = slice, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_native, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_native, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_native, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = range std::cout << "* m = slice, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_range, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_range, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_range, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); // v1 = slice std::cout << "* m = slice, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_slice, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_slice, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = NumericT(i+j); std::cout << "* m = slice, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_slice, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; return retval; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef int NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: int" << std::endl; std::cout << " layout: row-major" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef int NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: int" << std::endl; std::cout << " layout: column-major" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef long NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: row-major" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef long NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: column-major" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/scheduler_matrix_vector.cpp000644 001750 001750 00000105145 12267307531 023355 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/lu.hpp" #include "examples/tutorial/Random.hpp" #include "viennacl/scheduler/execute.hpp" #include "viennacl/scheduler/io.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(ublas::vector const & v1, VCLVectorType const & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return norm_inf(v2_cpu); } template ScalarType diff(ublas::matrix const & mat1, VCLMatrixType const & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } // // ------------------------------------------------------------- // template int test_prod_rank1(Epsilon const & epsilon, UblasMatrixType & ublas_m1, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, VCLMatrixType & vcl_m1, VCLVectorType1 & vcl_v1, VCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; // sync data: viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); viennacl::copy(ublas_m1, vcl_m1); /* TODO: Add rank-1 operations here */ //reset vcl_matrix: viennacl::copy(ublas_m1, vcl_m1); // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product" << std::endl; ublas_v1 = viennacl::linalg::prod(ublas_m1, ublas_v2); { viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::prod(vcl_m1, vcl_v2)); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Matrix-Vector product with inplace-add" << std::endl; ublas_v1 += viennacl::linalg::prod(ublas_m1, ublas_v2); { viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_add(), viennacl::linalg::prod(vcl_m1, vcl_v2)); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Matrix-Vector product with inplace-sub" << std::endl; ublas_v1 -= viennacl::linalg::prod(ublas_m1, ublas_v2); { viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_sub(), viennacl::linalg::prod(vcl_m1, vcl_v2)); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- /* std::cout << "Matrix-Vector product with scaled matrix" << std::endl; ublas_v1 = viennacl::linalg::prod(NumericT(2.0) * ublas_m1, ublas_v2); { viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::prod(NumericT(2.0) * vcl_m1, vcl_v2)); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; }*/ // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product with scaled vector" << std::endl; /* ublas_v1 = viennacl::linalg::prod(ublas_m1, NumericT(2.0) * ublas_v2); { viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::prod(vcl_m1, NumericT(2.0) * vcl_v2)); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; }*/ // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product with scaled matrix and scaled vector" << std::endl; /* ublas_v1 = viennacl::linalg::prod(NumericT(2.0) * ublas_m1, NumericT(2.0) * ublas_v2); { viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::prod(NumericT(2.0) * vcl_m1, NumericT(2.0) * vcl_v2)); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; }*/ // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product with scaled add" << std::endl; NumericT alpha = static_cast(2.786); NumericT beta = static_cast(3.1415); viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * viennacl::linalg::prod(ublas_m1, ublas_v2) - beta * ublas_v1; { viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), alpha * viennacl::linalg::prod(vcl_m1, vcl_v2) - beta * vcl_v1); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Matrix-Vector product with scaled add, inplace-add" << std::endl; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * viennacl::linalg::prod(ublas_m1, ublas_v2) - beta * ublas_v1; { viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_add(), alpha * viennacl::linalg::prod(vcl_m1, vcl_v2) - beta * vcl_v1); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Matrix-Vector product with scaled add, inplace-sub" << std::endl; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * viennacl::linalg::prod(ublas_m1, ublas_v2) - beta * ublas_v1; { viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_sub(), alpha * viennacl::linalg::prod(vcl_m1, vcl_v2) - beta * vcl_v1); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Transposed Matrix-Vector product" << std::endl; ublas_v2 = viennacl::linalg::prod(trans(ublas_m1), ublas_v1); { viennacl::scheduler::statement my_statement(vcl_v2, viennacl::op_assign(), viennacl::linalg::prod(trans(vcl_m1), vcl_v1)); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: transposed matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Transposed Matrix-Vector product, inplace-add" << std::endl; ublas_v2 += viennacl::linalg::prod(trans(ublas_m1), ublas_v1); { viennacl::scheduler::statement my_statement(vcl_v2, viennacl::op_inplace_add(), viennacl::linalg::prod(trans(vcl_m1), vcl_v1)); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: transposed matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Transposed Matrix-Vector product, inplace-sub" << std::endl; ublas_v2 -= viennacl::linalg::prod(trans(ublas_m1), ublas_v1); { viennacl::scheduler::statement my_statement(vcl_v2, viennacl::op_inplace_sub(), viennacl::linalg::prod(trans(vcl_m1), vcl_v1)); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: transposed matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Transposed Matrix-Vector product with scaled add" << std::endl; ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1) + beta * ublas_v2; { viennacl::scheduler::statement my_statement(vcl_v2, viennacl::op_assign(), alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1) + beta * vcl_v2); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Transposed Matrix-Vector product with scaled add, inplace-add" << std::endl; ublas_v2 += alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1) + beta * ublas_v2; { viennacl::scheduler::statement my_statement(vcl_v2, viennacl::op_inplace_add(), alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1) + beta * vcl_v2); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Transposed Matrix-Vector product with scaled add, inplace-sub" << std::endl; ublas_v2 -= alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1) + beta * ublas_v2; { viennacl::scheduler::statement my_statement(vcl_v2, viennacl::op_inplace_sub(), alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1) + beta * vcl_v2); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename F, typename Epsilon > int test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; std::size_t num_rows = 141; std::size_t num_cols = 79; // -------------------------------------------------------------------------- ublas::vector ublas_v1(num_rows); for (std::size_t i = 0; i < ublas_v1.size(); ++i) ublas_v1(i) = random(); ublas::vector ublas_v2 = ublas::scalar_vector(num_cols, NumericT(3.1415)); ublas::matrix ublas_m1(ublas_v1.size(), ublas_v2.size()); for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = static_cast(0.1) * random(); ublas::matrix ublas_m2(ublas_v1.size(), ublas_v1.size()); for (std::size_t i = 0; i < ublas_m2.size1(); ++i) { for (std::size_t j = 0; j < ublas_m2.size2(); ++j) ublas_m2(i,j) = static_cast(-0.1) * random(); ublas_m2(i, i) = static_cast(2) + random(); } viennacl::vector vcl_v1_native(ublas_v1.size()); viennacl::vector vcl_v1_large(4 * ublas_v1.size()); viennacl::vector_range< viennacl::vector > vcl_v1_range(vcl_v1_large, viennacl::range(3, ublas_v1.size() + 3)); viennacl::vector_slice< viennacl::vector > vcl_v1_slice(vcl_v1_large, viennacl::slice(2, 3, ublas_v1.size())); viennacl::vector vcl_v2_native(ublas_v2.size()); viennacl::vector vcl_v2_large(4 * ublas_v2.size()); viennacl::vector_range< viennacl::vector > vcl_v2_range(vcl_v2_large, viennacl::range(8, ublas_v2.size() + 8)); viennacl::vector_slice< viennacl::vector > vcl_v2_slice(vcl_v2_large, viennacl::slice(6, 2, ublas_v2.size())); viennacl::matrix vcl_m1_native(ublas_m1.size1(), ublas_m1.size2()); viennacl::matrix vcl_m1_large(4 * ublas_m1.size1(), 4 * ublas_m1.size2()); viennacl::matrix_range< viennacl::matrix > vcl_m1_range(vcl_m1_large, viennacl::range(8, ublas_m1.size1() + 8), viennacl::range(ublas_m1.size2(), 2 * ublas_m1.size2()) ); viennacl::matrix_slice< viennacl::matrix > vcl_m1_slice(vcl_m1_large, viennacl::slice(6, 2, ublas_m1.size1()), viennacl::slice(ublas_m1.size2(), 2, ublas_m1.size2()) ); viennacl::matrix vcl_m2_native(ublas_m2.size1(), ublas_m2.size2()); viennacl::matrix vcl_m2_large(4 * ublas_m2.size1(), 4 * ublas_m2.size2()); viennacl::matrix_range< viennacl::matrix > vcl_m2_range(vcl_m2_large, viennacl::range(8, ublas_m2.size1() + 8), viennacl::range(ublas_m2.size2(), 2 * ublas_m2.size2()) ); viennacl::matrix_slice< viennacl::matrix > vcl_m2_slice(vcl_m2_large, viennacl::slice(6, 2, ublas_m2.size1()), viennacl::slice(ublas_m2.size2(), 2, ublas_m2.size2()) ); /* std::cout << "Matrix resizing (to larger)" << std::endl; matrix.resize(2*num_rows, 2*num_cols, true); for (unsigned int i = 0; i < matrix.size1(); ++i) { for (unsigned int j = (i epsilon ) { std::cout << "# Error at operation: matrix resize (to larger)" << std::endl; std::cout << " diff: " << std::fabs(diff(matrix, vcl_matrix)) << std::endl; return EXIT_FAILURE; } matrix(12, 14) = NumericT(1.9); matrix(19, 16) = NumericT(1.0); matrix (13, 15) = NumericT(-9); vcl_matrix(12, 14) = NumericT(1.9); vcl_matrix(19, 16) = NumericT(1.0); vcl_matrix (13, 15) = NumericT(-9); std::cout << "Matrix resizing (to smaller)" << std::endl; matrix.resize(result.size(), rhs.size(), true); vcl_matrix.resize(result.size(), rhs.size(), true); if( std::fabs(diff(matrix, vcl_matrix)) > epsilon ) { std::cout << "# Error at operation: matrix resize (to smaller)" << std::endl; std::cout << " diff: " << std::fabs(diff(matrix, vcl_matrix)) << std::endl; return EXIT_FAILURE; } */ // // Run a bunch of tests for rank-1-updates, matrix-vector products // std::cout << "------------ Testing rank-1-updates and matrix-vector products ------------------" << std::endl; std::cout << "* m = full, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_native, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_native, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_native, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = range std::cout << "* m = full, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_range, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_range, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_range, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = slice std::cout << "* m = full, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_slice, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_slice, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_native, vcl_v1_slice, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; ///////////////////////////// matrix_range std::cout << "* m = range, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_native, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_native, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_native, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = range std::cout << "* m = range, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_range, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_range, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_range, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = slice std::cout << "* m = range, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_slice, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_slice, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_range, vcl_v1_slice, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; ///////////////////////////// matrix_slice std::cout << "* m = slice, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_native, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_native, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_native, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = range std::cout << "* m = slice, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_range, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_range, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_range, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = slice std::cout << "* m = slice, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_slice, vcl_v2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_slice, vcl_v2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, vcl_m1_slice, vcl_v1_slice, vcl_v2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; return retval; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/blas3_prod_float.cpp000644 001750 001750 00000004165 12267307531 021646 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "blas3_prod_float_double.hpp" // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: BLAS 3 routines" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/sparse.cpp000644 001750 001750 00000121772 12267307531 017732 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #ifndef NDEBUG #define NDEBUG #endif // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/compressed_compressed_matrix.hpp" #include "viennacl/coordinate_matrix.hpp" #include "viennacl/ell_matrix.hpp" #include "viennacl/hyb_matrix.hpp" #include "viennacl/vector.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/ilu.hpp" #include "viennacl/linalg/detail/ilu/common.hpp" #include "viennacl/io/matrix_market.hpp" #include "examples/tutorial/Random.hpp" #include "examples/tutorial/vector-io.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { if (s1 != s2) return (s1 - s2) / std::max(fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(ublas::vector & v1, viennacl::vector & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i 0 ) { //if (std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) < 1e-10 ) //absolute tolerance (avoid round-off issues) // v2_cpu[i] = 0; //else v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); } else v2_cpu[i] = 0.0; if (v2_cpu[i] > 0.0001) { //std::cout << "Neighbor: " << i-1 << ": " << v1[i-1] << " vs. " << v2_cpu[i-1] << std::endl; std::cout << "Error at entry " << i << ": " << v1[i] << " vs. " << v2_cpu[i] << std::endl; //std::cout << "Neighbor: " << i+1 << ": " << v1[i+1] << " vs. " << v2_cpu[i+1] << std::endl; exit(EXIT_FAILURE); } } return norm_inf(v2_cpu); } template ScalarType diff(ublas::compressed_matrix & cpu_matrix, VCL_MATRIX & gpu_matrix) { typedef ublas::compressed_matrix CPU_MATRIX; CPU_MATRIX from_gpu(gpu_matrix.size1(), gpu_matrix.size2()); viennacl::backend::finish(); viennacl::copy(gpu_matrix, from_gpu); ScalarType error = 0; //step 1: compare all entries from cpu_matrix with gpu_matrix: //std::cout << "Ublas matrix: " << std::endl; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { //std::cout << "Row " << row_it.index1() << ": " << std::endl; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl; ScalarType current_error = 0; if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } //step 2: compare all entries from gpu_matrix with cpu_matrix (sparsity pattern might differ): //std::cout << "ViennaCL matrix: " << std::endl; for (typename CPU_MATRIX::const_iterator1 row_it = from_gpu.begin1(); row_it != from_gpu.end1(); ++row_it) { //std::cout << "Row " << row_it.index1() << ": " << std::endl; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl; ScalarType current_error = 0; if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } return error; } template int strided_matrix_vector_product_test(Epsilon epsilon, UblasVectorT & result, UblasVectorT const & rhs, VCLVectorT & vcl_result, VCLVectorT & vcl_rhs) { int retval = EXIT_SUCCESS; ublas::compressed_matrix ublas_matrix2(5, 4); ublas_matrix2(0, 0) = NumericT(2.0); ublas_matrix2(0, 2) = NumericT(-1.0); ublas_matrix2(1, 0) = NumericT(3.0); ublas_matrix2(1, 2) = NumericT(-5.0); ublas_matrix2(2, 1) = NumericT(5.0); ublas_matrix2(2, 2) = NumericT(-2.0); ublas_matrix2(3, 2) = NumericT(1.0); ublas_matrix2(3, 3) = NumericT(-6.0); ublas_matrix2(4, 1) = NumericT(7.0); ublas_matrix2(4, 2) = NumericT(-5.0); project(result, ublas::slice(1, 3, 5)) = ublas::prod(ublas_matrix2, project(rhs, ublas::slice(3, 2, 4))); VCL_MatrixT vcl_sparse_matrix2; viennacl::copy(ublas_matrix2, vcl_sparse_matrix2); viennacl::vector vec(4); vec(0) = rhs(3); vec(1) = rhs(5); vec(2) = rhs(7); vec(3) = rhs(9); viennacl::project(vcl_result, viennacl::slice(1, 3, 5)) = viennacl::linalg::prod(vcl_sparse_matrix2, viennacl::project(vcl_rhs, viennacl::slice(3, 2, 4))); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with stided vectors, part 1" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } vcl_result(1) = NumericT(1.0); vcl_result(4) = NumericT(1.0); vcl_result(7) = NumericT(1.0); vcl_result(10) = NumericT(1.0); vcl_result(13) = NumericT(1.0); viennacl::project(vcl_result, viennacl::slice(1, 3, 5)) = viennacl::linalg::prod(vcl_sparse_matrix2, vec); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with strided vectors, part 2" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } return retval; } template< typename NumericT, typename VCL_MATRIX, typename Epsilon > int resize_test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; ublas::compressed_matrix ublas_matrix(5,5); VCL_MATRIX vcl_matrix; ublas_matrix(0,0) = NumericT(10.0); ublas_matrix(0, 1) = NumericT(0.1); ublas_matrix(0, 2) = NumericT(0.2); ublas_matrix(0, 3) = NumericT(0.3); ublas_matrix(0, 4) = NumericT(0.4); ublas_matrix(1,0) = NumericT(1.0); ublas_matrix(1, 1) = NumericT(1.1); ublas_matrix(1, 2) = NumericT(1.2); ublas_matrix(1, 3) = NumericT(1.3); ublas_matrix(1, 4) = NumericT(1.4); ublas_matrix(2,0) = NumericT(2.0); ublas_matrix(2, 1) = NumericT(2.1); ublas_matrix(2, 2) = NumericT(2.2); ublas_matrix(2, 3) = NumericT(2.3); ublas_matrix(2, 4) = NumericT(2.4); ublas_matrix(3,0) = NumericT(3.0); ublas_matrix(3, 1) = NumericT(3.1); ublas_matrix(3, 2) = NumericT(3.2); ublas_matrix(3, 3) = NumericT(3.3); ublas_matrix(3, 4) = NumericT(3.4); ublas_matrix(4,0) = NumericT(4.0); ublas_matrix(4, 1) = NumericT(4.1); ublas_matrix(4, 2) = NumericT(4.2); ublas_matrix(4, 3) = NumericT(4.3); ublas_matrix(4, 4) = NumericT(4.4); viennacl::copy(ublas_matrix, vcl_matrix); ublas::compressed_matrix other_matrix(ublas_matrix.size1(), ublas_matrix.size2()); viennacl::copy(vcl_matrix, other_matrix); std::cout << "Checking for equality after copy..." << std::endl; if( std::fabs(diff(ublas_matrix, vcl_matrix)) > epsilon ) { std::cout << "# Error at operation: equality after copy with sparse matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_matrix, vcl_matrix)) << std::endl; return EXIT_FAILURE; } std::cout << "Testing resize to larger..." << std::endl; ublas_matrix.resize(10, 10, false); //ublas does not allow preserve = true here ublas_matrix(0,0) = NumericT(10.0); ublas_matrix(0, 1) = NumericT(0.1); ublas_matrix(0, 2) = NumericT(0.2); ublas_matrix(0, 3) = NumericT(0.3); ublas_matrix(0, 4) = NumericT(0.4); ublas_matrix(1,0) = NumericT( 1.0); ublas_matrix(1, 1) = NumericT(1.1); ublas_matrix(1, 2) = NumericT(1.2); ublas_matrix(1, 3) = NumericT(1.3); ublas_matrix(1, 4) = NumericT(1.4); ublas_matrix(2,0) = NumericT( 2.0); ublas_matrix(2, 1) = NumericT(2.1); ublas_matrix(2, 2) = NumericT(2.2); ublas_matrix(2, 3) = NumericT(2.3); ublas_matrix(2, 4) = NumericT(2.4); ublas_matrix(3,0) = NumericT( 3.0); ublas_matrix(3, 1) = NumericT(3.1); ublas_matrix(3, 2) = NumericT(3.2); ublas_matrix(3, 3) = NumericT(3.3); ublas_matrix(3, 4) = NumericT(3.4); ublas_matrix(4,0) = NumericT( 4.0); ublas_matrix(4, 1) = NumericT(4.1); ublas_matrix(4, 2) = NumericT(4.2); ublas_matrix(4, 3) = NumericT(4.3); ublas_matrix(4, 4) = NumericT(4.4); //std::cout << ublas_matrix << std::endl; vcl_matrix.resize(10, 10, true); if( std::fabs(diff(ublas_matrix, vcl_matrix)) > epsilon ) { std::cout << "# Error at operation: resize (to larger) with sparse matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_matrix, vcl_matrix)) << std::endl; return EXIT_FAILURE; } ublas_matrix(5,5) = NumericT(5.5); ublas_matrix(5, 6) = NumericT(5.6); ublas_matrix(5, 7) = NumericT(5.7); ublas_matrix(5, 8) = NumericT(5.8); ublas_matrix(5, 9) = NumericT(5.9); ublas_matrix(6,5) = NumericT(6.5); ublas_matrix(6, 6) = NumericT(6.6); ublas_matrix(6, 7) = NumericT(6.7); ublas_matrix(6, 8) = NumericT(6.8); ublas_matrix(6, 9) = NumericT(6.9); ublas_matrix(7,5) = NumericT(7.5); ublas_matrix(7, 6) = NumericT(7.6); ublas_matrix(7, 7) = NumericT(7.7); ublas_matrix(7, 8) = NumericT(7.8); ublas_matrix(7, 9) = NumericT(7.9); ublas_matrix(8,5) = NumericT(8.5); ublas_matrix(8, 6) = NumericT(8.6); ublas_matrix(8, 7) = NumericT(8.7); ublas_matrix(8, 8) = NumericT(8.8); ublas_matrix(8, 9) = NumericT(8.9); ublas_matrix(9,5) = NumericT(9.5); ublas_matrix(9, 6) = NumericT(9.6); ublas_matrix(9, 7) = NumericT(9.7); ublas_matrix(9, 8) = NumericT(9.8); ublas_matrix(9, 9) = NumericT(9.9); viennacl::copy(ublas_matrix, vcl_matrix); std::cout << "Testing resize to smaller..." << std::endl; ublas_matrix.resize(7, 7, false); //ublas does not allow preserve = true here ublas_matrix(0,0) = NumericT(10.0); ublas_matrix(0, 1) = NumericT(0.1); ublas_matrix(0, 2) = NumericT(0.2); ublas_matrix(0, 3) = NumericT(0.3); ublas_matrix(0, 4) = NumericT(0.4); ublas_matrix(1,0) = NumericT( 1.0); ublas_matrix(1, 1) = NumericT(1.1); ublas_matrix(1, 2) = NumericT(1.2); ublas_matrix(1, 3) = NumericT(1.3); ublas_matrix(1, 4) = NumericT(1.4); ublas_matrix(2,0) = NumericT( 2.0); ublas_matrix(2, 1) = NumericT(2.1); ublas_matrix(2, 2) = NumericT(2.2); ublas_matrix(2, 3) = NumericT(2.3); ublas_matrix(2, 4) = NumericT(2.4); ublas_matrix(3,0) = NumericT( 3.0); ublas_matrix(3, 1) = NumericT(3.1); ublas_matrix(3, 2) = NumericT(3.2); ublas_matrix(3, 3) = NumericT(3.3); ublas_matrix(3, 4) = NumericT(3.4); ublas_matrix(4,0) = NumericT( 4.0); ublas_matrix(4, 1) = NumericT(4.1); ublas_matrix(4, 2) = NumericT(4.2); ublas_matrix(4, 3) = NumericT(4.3); ublas_matrix(4, 4) = NumericT(4.4); ublas_matrix(5,5) = NumericT( 5.5); ublas_matrix(5, 6) = NumericT(5.6); ublas_matrix(5, 7) = NumericT(5.7); ublas_matrix(5, 8) = NumericT(5.8); ublas_matrix(5, 9) = NumericT(5.9); ublas_matrix(6,5) = NumericT( 6.5); ublas_matrix(6, 6) = NumericT(6.6); ublas_matrix(6, 7) = NumericT(6.7); ublas_matrix(6, 8) = NumericT(6.8); ublas_matrix(6, 9) = NumericT(6.9); vcl_matrix.resize(7, 7); //std::cout << ublas_matrix << std::endl; if( std::fabs(diff(ublas_matrix, vcl_matrix)) > epsilon ) { std::cout << "# Error at operation: resize (to smaller) with sparse matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_matrix, vcl_matrix)) << std::endl; retval = EXIT_FAILURE; } ublas::vector ublas_vec = ublas::scalar_vector(ublas_matrix.size1(), NumericT(3.1415)); viennacl::vector vcl_vec(ublas_matrix.size1()); std::cout << "Testing transposed unit lower triangular solve: compressed_matrix" << std::endl; viennacl::copy(ublas_vec, vcl_vec); std::cout << "matrix: " << ublas_matrix << std::endl; std::cout << "vector: " << ublas_vec << std::endl; std::cout << "ViennaCL matrix size: " << vcl_matrix.size1() << " x " << vcl_matrix.size2() << std::endl; std::cout << "ublas..." << std::endl; boost::numeric::ublas::inplace_solve((ublas_matrix), ublas_vec, boost::numeric::ublas::unit_lower_tag()); std::cout << "ViennaCL..." << std::endl; viennacl::linalg::inplace_solve((vcl_matrix), vcl_vec, viennacl::linalg::unit_lower_tag()); /* std::list< viennacl::backend::mem_handle > multifrontal_L_row_index_arrays_; std::list< viennacl::backend::mem_handle > multifrontal_L_row_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_L_col_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_L_element_buffers_; std::list< std::size_t > multifrontal_L_row_elimination_num_list_; viennacl::vector multifrontal_U_diagonal_; viennacl::linalg::detail::multifrontal_setup_L(vcl_matrix, multifrontal_U_diagonal_, //dummy multifrontal_L_row_index_arrays_, multifrontal_L_row_buffers_, multifrontal_L_col_buffers_, multifrontal_L_element_buffers_, multifrontal_L_row_elimination_num_list_); viennacl::linalg::detail::multifrontal_substitute(vcl_vec, multifrontal_L_row_index_arrays_, multifrontal_L_row_buffers_, multifrontal_L_col_buffers_, multifrontal_L_element_buffers_, multifrontal_L_row_elimination_num_list_); std::cout << "ublas..." << std::endl; boost::numeric::ublas::inplace_solve((ublas_matrix), ublas_vec, boost::numeric::ublas::upper_tag()); std::cout << "ViennaCL..." << std::endl; std::list< viennacl::backend::mem_handle > multifrontal_U_row_index_arrays_; std::list< viennacl::backend::mem_handle > multifrontal_U_row_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_U_col_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_U_element_buffers_; std::list< std::size_t > multifrontal_U_row_elimination_num_list_; multifrontal_U_diagonal_.resize(vcl_matrix.size1(), false); viennacl::linalg::single_threaded::detail::row_info(vcl_matrix, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); viennacl::linalg::detail::multifrontal_setup_U(vcl_matrix, multifrontal_U_diagonal_, multifrontal_U_row_index_arrays_, multifrontal_U_row_buffers_, multifrontal_U_col_buffers_, multifrontal_U_element_buffers_, multifrontal_U_row_elimination_num_list_); vcl_vec = viennacl::linalg::element_div(vcl_vec, multifrontal_U_diagonal_); viennacl::linalg::detail::multifrontal_substitute(vcl_vec, multifrontal_U_row_index_arrays_, multifrontal_U_row_buffers_, multifrontal_U_col_buffers_, multifrontal_U_element_buffers_, multifrontal_U_row_elimination_num_list_); */ for (std::size_t i=0; i int test(Epsilon const& epsilon) { std::cout << "Testing resizing of compressed_matrix..." << std::endl; int retval = resize_test >(epsilon); if (retval != EXIT_SUCCESS) return retval; std::cout << "Testing resizing of coordinate_matrix..." << std::endl; //if (retval != EXIT_FAILURE) // retval = resize_test >(epsilon); //else // return retval; // -------------------------------------------------------------------------- ublas::vector rhs; ublas::vector result; ublas::compressed_matrix ublas_matrix; if (viennacl::io::read_matrix_market_file(ublas_matrix, "../../examples/testdata/mat65k.mtx") == EXIT_FAILURE) { std::cout << "Error reading Matrix file" << std::endl; return EXIT_FAILURE; } //unsigned int cg_mat_size = cg_mat.size(); std::cout << "done reading matrix" << std::endl; rhs.resize(ublas_matrix.size2()); for (std::size_t i=0; i(); } // add some random numbers to the double-compressed matrix: ublas::compressed_matrix ublas_cc_matrix(ublas_matrix.size1(), ublas_matrix.size2()); ublas_cc_matrix(42,199) = NumericT(3.1415); ublas_cc_matrix(31, 69) = NumericT(2.71); ublas_cc_matrix(23, 32) = NumericT(6); ublas_cc_matrix(177,57) = NumericT(4); ublas_cc_matrix(21, 97) = NumericT(-4); ublas_cc_matrix(92, 25) = NumericT(2); ublas_cc_matrix(89, 62) = NumericT(11); ublas_cc_matrix(1, 7) = NumericT(8); ublas_cc_matrix(85, 41) = NumericT(13); ublas_cc_matrix(66, 28) = NumericT(8); ublas_cc_matrix(21, 74) = NumericT(-2); result = rhs; viennacl::vector vcl_rhs(rhs.size()); viennacl::vector vcl_result(result.size()); viennacl::vector vcl_result2(result.size()); viennacl::compressed_matrix vcl_compressed_matrix(rhs.size(), rhs.size()); viennacl::compressed_compressed_matrix vcl_compressed_compressed_matrix(rhs.size(), rhs.size()); viennacl::coordinate_matrix vcl_coordinate_matrix(rhs.size(), rhs.size()); viennacl::ell_matrix vcl_ell_matrix; viennacl::hyb_matrix vcl_hyb_matrix; viennacl::copy(rhs.begin(), rhs.end(), vcl_rhs.begin()); viennacl::copy(ublas_matrix, vcl_compressed_matrix); viennacl::copy(ublas_cc_matrix, vcl_compressed_compressed_matrix); viennacl::copy(ublas_matrix, vcl_coordinate_matrix); // -------------------------------------------------------------------------- std::cout << "Testing products: ublas" << std::endl; result = viennacl::linalg::prod(ublas_matrix, rhs); std::cout << "Testing products: compressed_matrix" << std::endl; vcl_result = viennacl::linalg::prod(vcl_compressed_matrix, vcl_rhs); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing products: compressed_matrix, strided vectors" << std::endl; retval = strided_matrix_vector_product_test >(epsilon, result, rhs, vcl_result, vcl_rhs); if (retval != EXIT_SUCCESS) return retval; // // Triangular solvers for A \ b: // ublas::compressed_matrix ublas_matrix_trans(ublas_matrix.size2(), ublas_matrix.size1(), ublas_matrix.nnz()); // = trans(ublas_matrix); //note: triangular solvers with uBLAS show atrocious performance, while transposed solvers are quite okay. To keep execution times short, we use a double-transpose-trick in the following. // fast transpose: for (typename ublas::compressed_matrix::iterator1 row_it = ublas_matrix.begin1(); row_it != ublas_matrix.end1(); ++row_it) { for (typename ublas::compressed_matrix::iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ublas_matrix_trans(col_it.index1(), col_it.index2()) = *col_it; } } std::cout << "Testing unit upper triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::unit_upper_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: unit upper triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing upper triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::upper_tag()); viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::upper_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: upper triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing unit lower triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::unit_lower_tag()); /*std::list< viennacl::backend::mem_handle > multifrontal_L_row_index_arrays_; std::list< viennacl::backend::mem_handle > multifrontal_L_row_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_L_col_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_L_element_buffers_; std::list< std::size_t > multifrontal_L_row_elimination_num_list_; viennacl::vector multifrontal_U_diagonal_; viennacl::switch_memory_domain(multifrontal_U_diagonal_, viennacl::MAIN_MEMORY); multifrontal_U_diagonal_.resize(vcl_compressed_matrix.size1(), false); viennacl::linalg::single_threaded::detail::row_info(vcl_compressed_matrix, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); viennacl::linalg::detail::multifrontal_setup_L(vcl_compressed_matrix, multifrontal_U_diagonal_, //dummy multifrontal_L_row_index_arrays_, multifrontal_L_row_buffers_, multifrontal_L_col_buffers_, multifrontal_L_element_buffers_, multifrontal_L_row_elimination_num_list_); viennacl::linalg::detail::multifrontal_substitute(vcl_result, multifrontal_L_row_index_arrays_, multifrontal_L_row_buffers_, multifrontal_L_col_buffers_, multifrontal_L_element_buffers_, multifrontal_L_row_elimination_num_list_);*/ if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: unit lower triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing lower triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::lower_tag()); viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::lower_tag()); /*std::list< viennacl::backend::mem_handle > multifrontal_U_row_index_arrays_; std::list< viennacl::backend::mem_handle > multifrontal_U_row_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_U_col_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_U_element_buffers_; std::list< std::size_t > multifrontal_U_row_elimination_num_list_; multifrontal_U_diagonal_.resize(vcl_compressed_matrix.size1(), false); viennacl::linalg::single_threaded::detail::row_info(vcl_compressed_matrix, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); viennacl::linalg::detail::multifrontal_setup_U(vcl_compressed_matrix, multifrontal_U_diagonal_, multifrontal_U_row_index_arrays_, multifrontal_U_row_buffers_, multifrontal_U_col_buffers_, multifrontal_U_element_buffers_, multifrontal_U_row_elimination_num_list_); vcl_result = viennacl::linalg::element_div(vcl_result, multifrontal_U_diagonal_); viennacl::linalg::detail::multifrontal_substitute(vcl_result, multifrontal_U_row_index_arrays_, multifrontal_U_row_buffers_, multifrontal_U_col_buffers_, multifrontal_U_element_buffers_, multifrontal_U_row_elimination_num_list_);*/ if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: lower triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } /* std::cout << "Testing lower triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(ublas_matrix, result, boost::numeric::ublas::lower_tag()); viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::lower_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: lower triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; }*/ // // Triangular solvers for A^T \ b // std::cout << "Testing transposed unit upper triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix), result, boost::numeric::ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_compressed_matrix), vcl_result, viennacl::linalg::unit_upper_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: unit upper triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing transposed upper triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix), result, boost::numeric::ublas::upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_compressed_matrix), vcl_result, viennacl::linalg::upper_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: upper triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing transposed unit lower triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix), result, boost::numeric::ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_compressed_matrix), vcl_result, viennacl::linalg::unit_lower_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: unit lower triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing transposed lower triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix), result, boost::numeric::ublas::lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_compressed_matrix), vcl_result, viennacl::linalg::lower_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: lower triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing products: compressed_compressed_matrix" << std::endl; result = viennacl::linalg::prod(ublas_cc_matrix, rhs); vcl_result = viennacl::linalg::prod(vcl_compressed_compressed_matrix, vcl_rhs); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with compressed_compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } { ublas::compressed_matrix temp(vcl_compressed_compressed_matrix.size1(), vcl_compressed_compressed_matrix.size2()); viennacl::copy(vcl_compressed_compressed_matrix, temp); // check that entries are correct by computing the product again: result = viennacl::linalg::prod(temp, rhs); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with compressed_compressed_matrix (after copy back)" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } } std::cout << "Testing products: coordinate_matrix" << std::endl; result = viennacl::linalg::prod(ublas_matrix, rhs); vcl_result = viennacl::linalg::prod(vcl_coordinate_matrix, vcl_rhs); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with coordinate_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing products: coordinate_matrix, strided vectors" << std::endl; //std::cout << " --> SKIPPING <--" << std::endl; retval = strided_matrix_vector_product_test >(epsilon, result, rhs, vcl_result, vcl_rhs); if (retval != EXIT_SUCCESS) return retval; //std::cout << "Copying ell_matrix" << std::endl; viennacl::copy(ublas_matrix, vcl_ell_matrix); ublas_matrix.clear(); viennacl::copy(vcl_ell_matrix, ublas_matrix);// just to check that it's works std::cout << "Testing products: ell_matrix" << std::endl; result = viennacl::linalg::prod(ublas_matrix, rhs); vcl_result.clear(); vcl_result = viennacl::linalg::prod(vcl_ell_matrix, vcl_rhs); //viennacl::linalg::prod_impl(vcl_ell_matrix, vcl_rhs, vcl_result); //std::cout << vcl_result << "\n"; //std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; //std::cout << "First entry of result vector: " << vcl_result[0] << std::endl; if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with ell_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing products: ell_matrix, strided vectors" << std::endl; retval = strided_matrix_vector_product_test >(epsilon, result, rhs, vcl_result, vcl_rhs); if (retval != EXIT_SUCCESS) return retval; //std::cout << "Copying hyb_matrix" << std::endl; viennacl::copy(ublas_matrix, vcl_hyb_matrix); ublas_matrix.clear(); viennacl::copy(vcl_hyb_matrix, ublas_matrix);// just to check that it's works viennacl::copy(ublas_matrix, vcl_hyb_matrix); std::cout << "Testing products: hyb_matrix" << std::endl; result = viennacl::linalg::prod(ublas_matrix, rhs); vcl_result.clear(); vcl_result = viennacl::linalg::prod(vcl_hyb_matrix, vcl_rhs); //viennacl::linalg::prod_impl(vcl_hyb_matrix, vcl_rhs, vcl_result); //std::cout << vcl_result << "\n"; //std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; //std::cout << "First entry of result vector: " << vcl_result[0] << std::endl; if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with hyb_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing products: hyb_matrix, strided vectors" << std::endl; retval = strided_matrix_vector_product_test >(epsilon, result, rhs, vcl_result, vcl_rhs); if (retval != EXIT_SUCCESS) return retval; // -------------------------------------------------------------------------- // -------------------------------------------------------------------------- NumericT alpha = static_cast(2.786); NumericT beta = static_cast(1.432); copy(rhs.begin(), rhs.end(), vcl_rhs.begin()); copy(result.begin(), result.end(), vcl_result.begin()); copy(result.begin(), result.end(), vcl_result2.begin()); std::cout << "Testing scaled additions of products and vectors" << std::endl; result = alpha * viennacl::linalg::prod(ublas_matrix, rhs) + beta * result; vcl_result2 = alpha * viennacl::linalg::prod(vcl_compressed_matrix, vcl_rhs) + beta * vcl_result; if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (compressed_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } vcl_result2.clear(); vcl_result2 = alpha * viennacl::linalg::prod(vcl_coordinate_matrix, vcl_rhs) + beta * vcl_result; if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (coordinate_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } vcl_result2.clear(); vcl_result2 = alpha * viennacl::linalg::prod(vcl_ell_matrix, vcl_rhs) + beta * vcl_result; if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (ell_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } vcl_result2.clear(); vcl_result2 = alpha * viennacl::linalg::prod(vcl_hyb_matrix, vcl_rhs) + beta * vcl_result; if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (hyb_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Sparse Matrices" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = static_cast(1E-4); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } #ifdef VIENNACL_WITH_OPENCL else std::cout << "No double precision support, skipping test..." << std::endl; #endif std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/vector_double.cu000644 001750 001750 00000004275 12267307531 021114 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "vector_float_double.hpp" // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-10; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/scheduler_vector.cpp000644 001750 001750 00000061534 12267307531 021774 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include // // *** Boost // #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/inner_prod.hpp" #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #include "viennacl/scheduler/execute.hpp" #include "viennacl/scheduler/io.hpp" #include "Random.hpp" using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::scalar const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::entry_proxy const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ublas::vector const & v1, ViennaCLVectorType const & vcl_vec) { ublas::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return ublas::norm_inf(v2_cpu); } template int check(T1 const & t1, T2 const & t2, double epsilon) { int retval = EXIT_SUCCESS; double temp = std::fabs(diff(t1, t2)); if (temp > epsilon) { std::cout << "# Error! Relative difference: " << temp << std::endl; retval = EXIT_FAILURE; } else std::cout << "PASSED!" << std::endl; return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename Epsilon, typename UblasVectorType, typename ViennaCLVectorType1, typename ViennaCLVectorType2 > int test(Epsilon const& epsilon, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; NumericT cpu_result = 42.0; viennacl::scalar gpu_result = 43.0; NumericT alpha = NumericT(3.1415); NumericT beta = NumericT(2.7172); // // Initializer: // std::cout << "Checking for zero_vector initializer..." << std::endl; ublas_v1 = ublas::zero_vector(ublas_v1.size()); vcl_v1 = viennacl::zero_vector(vcl_v1.size()); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for scalar_vector initializer..." << std::endl; ublas_v1 = ublas::scalar_vector(ublas_v1.size(), cpu_result); vcl_v1 = viennacl::scalar_vector(vcl_v1.size(), cpu_result); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas::scalar_vector(ublas_v1.size(), gpu_result); vcl_v1 = viennacl::scalar_vector(vcl_v1.size(), gpu_result); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for unit_vector initializer..." << std::endl; ublas_v1 = ublas::unit_vector(ublas_v1.size(), 5); vcl_v1 = viennacl::unit_vector(vcl_v1.size(), 5); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i(); ublas_v2[i] = NumericT(1.0) + random(); } viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing simple assignments..." << std::endl; { ublas_v1 = ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), vcl_v2); // same as vcl_v1 = vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { ublas_v1 += ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_add(), vcl_v2); // same as vcl_v1 += vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { ublas_v1 -= ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_sub(), vcl_v2); // same as vcl_v1 -= vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "Testing composite assignments..." << std::endl; { ublas_v1 = ublas_v1 + ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 + vcl_v2); // same as vcl_v1 = vcl_v1 + vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { ublas_v1 += alpha * ublas_v1 - beta * ublas_v2 + ublas_v1 / beta - ublas_v2 / alpha; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_inplace_add(), alpha * vcl_v1 - beta * vcl_v2 + vcl_v1 / beta - vcl_v2 / alpha); // same as vcl_v1 += alpha * vcl_v1 - beta * vcl_v2 + beta * vcl_v1 - alpha * vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { ublas_v1 = ublas_v1 - ublas_v2; viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), vcl_v1 - vcl_v2); // same as vcl_v1 = vcl_v1 - vcl_v2; viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "--- Testing reductions ---" << std::endl; std::cout << "inner_prod..." << std::endl; { cpu_result = inner_prod(ublas_v1, ublas_v2); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1, vcl_v2)); // same as gpu_result = inner_prod(vcl_v1, vcl_v2); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = inner_prod(ublas_v1 + ublas_v2, ublas_v2); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2)); // same as gpu_result = inner_prod(vcl_v1 + vcl_v2, vcl_v2); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = inner_prod(ublas_v1, ublas_v2 - ublas_v1); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1, vcl_v2 - vcl_v1)); // same as gpu_result = inner_prod(vcl_v1, vcl_v2 - vcl_v1); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = inner_prod(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::inner_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1)); // same as gpu_result = inner_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "norm_1..." << std::endl; { cpu_result = norm_1(ublas_v1); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_1(vcl_v1)); // same as gpu_result = norm_1(vcl_v1); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = norm_1(ublas_v1 + ublas_v2); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_1(vcl_v1 + vcl_v2)); // same as gpu_result = norm_1(vcl_v1 + vcl_v2); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "norm_2..." << std::endl; { cpu_result = norm_2(ublas_v1); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_2(vcl_v1)); // same as gpu_result = norm_2(vcl_v1); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = norm_2(ublas_v1 + ublas_v2); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_2(vcl_v1 + vcl_v2)); // same as gpu_result = norm_2(vcl_v1 + vcl_v2); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "norm_inf..." << std::endl; { cpu_result = norm_inf(ublas_v1); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_inf(vcl_v1)); // same as gpu_result = norm_inf(vcl_v1); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } { cpu_result = norm_inf(ublas_v1 - ublas_v2); viennacl::scheduler::statement my_statement(gpu_result, viennacl::op_assign(), viennacl::linalg::norm_inf(vcl_v1 - vcl_v2)); // same as gpu_result = norm_inf(vcl_v1 - vcl_v2); viennacl::scheduler::execute(my_statement); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "--- Testing elementwise operations (binary) ---" << std::endl; std::cout << "x = element_prod(x, y)... "; { ublas_v1 = element_prod(ublas_v1, ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1, vcl_v2)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_prod(x + y, y)... "; { ublas_v1 = element_prod(ublas_v1 + ublas_v2, ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_prod(x, x + y)... "; { ublas_v1 = element_prod(ublas_v1, ublas_v1 + ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_prod(x - y, y + x)... "; { ublas_v1 = element_prod(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_prod(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x, y)... "; { ublas_v1 = element_div(ublas_v1, ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1, vcl_v2)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x + y, y)... "; { ublas_v1 = element_div(ublas_v1 + ublas_v2, ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x, x + y)... "; { ublas_v1 = element_div(ublas_v1, ublas_v1 + ublas_v2); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "x = element_div(x - y, y + x)... "; { ublas_v1 = element_div(ublas_v1 - ublas_v2, ublas_v2 + ublas_v1); viennacl::scheduler::statement my_statement(vcl_v1, viennacl::op_assign(), viennacl::linalg::element_div(vcl_v1 - vcl_v2, vcl_v2 + vcl_v1)); viennacl::scheduler::execute(my_statement); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << "--- Testing elementwise operations (unary) ---" << std::endl; #define GENERATE_UNARY_OP_TEST(OPNAME) \ ublas_v1 = ublas::scalar_vector(ublas_v1.size(), NumericT(0.21)); \ ublas_v2 = NumericT(3.1415) * ublas_v1; \ viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); \ viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); \ { \ for (std::size_t i=0; i int test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; std::size_t size = 24656; std::cout << "Running tests for vector of size " << size << std::endl; // // Set up UBLAS objects // ublas::vector ublas_full_vec(size); ublas::vector ublas_full_vec2(ublas_full_vec.size()); for (std::size_t i=0; i(); ublas_full_vec2[i] = NumericT(1.0) + random(); } ublas::range r1( ublas_full_vec.size() / 4, 2 * ublas_full_vec.size() / 4); ublas::range r2(2 * ublas_full_vec2.size() / 4, 3 * ublas_full_vec2.size() / 4); ublas::vector_range< ublas::vector > ublas_range_vec(ublas_full_vec, r1); ublas::vector_range< ublas::vector > ublas_range_vec2(ublas_full_vec2, r2); ublas::slice s1( ublas_full_vec.size() / 4, 3, ublas_full_vec.size() / 4); ublas::slice s2(2 * ublas_full_vec2.size() / 4, 2, ublas_full_vec2.size() / 4); ublas::vector_slice< ublas::vector > ublas_slice_vec(ublas_full_vec, s1); ublas::vector_slice< ublas::vector > ublas_slice_vec2(ublas_full_vec2, s2); // // Set up ViennaCL objects // viennacl::vector vcl_full_vec(ublas_full_vec.size()); viennacl::vector vcl_full_vec2(ublas_full_vec2.size()); viennacl::fast_copy(ublas_full_vec.begin(), ublas_full_vec.end(), vcl_full_vec.begin()); viennacl::copy(ublas_full_vec2.begin(), ublas_full_vec2.end(), vcl_full_vec2.begin()); viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4); viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4); viennacl::vector_range< viennacl::vector > vcl_range_vec(vcl_full_vec, vcl_r1); viennacl::vector_range< viennacl::vector > vcl_range_vec2(vcl_full_vec2, vcl_r2); { viennacl::vector vcl_short_vec(vcl_range_vec); viennacl::vector vcl_short_vec2 = vcl_range_vec2; ublas::vector ublas_short_vec(ublas_range_vec); ublas::vector ublas_short_vec2(ublas_range_vec2); std::cout << "Testing creation of vectors from range..." << std::endl; if (check(ublas_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4); viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4); viennacl::vector_slice< viennacl::vector > vcl_slice_vec(vcl_full_vec, vcl_s1); viennacl::vector_slice< viennacl::vector > vcl_slice_vec2(vcl_full_vec2, vcl_s2); viennacl::vector vcl_short_vec(vcl_slice_vec); viennacl::vector vcl_short_vec2 = vcl_slice_vec2; ublas::vector ublas_short_vec(ublas_slice_vec); ublas::vector ublas_short_vec2(ublas_slice_vec2); std::cout << "Testing creation of vectors from slice..." << std::endl; if (check(ublas_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // Now start running tests for vectors, ranges and slices: // std::cout << " ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = range **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = range, vcl_v2 = vector **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = range **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = slice **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = range **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; return EXIT_SUCCESS; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = static_cast(1.0E-4); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/iterators.cpp000644 001750 001750 00000007045 12267307531 020445 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include // // *** ViennaCL // //#define VCL_BUILD_INFO //#define VIENNACL_WITH_UBLAS 1 #include "viennacl/matrix.hpp" #include "viennacl/vector.hpp" // // ------------------------------------------------------------- // template< typename NumericT > int test() { int retval = EXIT_SUCCESS; // -------------------------------------------------------------------------- typedef viennacl::vector VclVector; VclVector vcl_cont(3); vcl_cont[0] = 1; vcl_cont[1] = 2; vcl_cont[2] = 3; //typename VclVector::const_iterator const_iter_def_const; //typename VclVector::iterator iter_def_const; for(typename VclVector::const_iterator iter = vcl_cont.begin(); iter != vcl_cont.end(); iter++) { std::cout << *iter << std::endl; } for(typename VclVector::iterator iter = vcl_cont.begin(); iter != vcl_cont.end(); iter++) { std::cout << *iter << std::endl; } // -------------------------------------------------------------------------- return retval; } int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Iterators" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: float" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: double" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/external_2.cpp000644 001750 001750 00000004631 12267307531 020472 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // A check for the absence of external linkage (otherwise, library is not truly 'header-only') // //#define VIENNACL_WITH_EIGEN #define VIENNACL_WITH_UBLAS // // *** System // #include // // *** ViennaCL // #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/coordinate_matrix.hpp" #include "viennacl/ell_matrix.hpp" #include "viennacl/hyb_matrix.hpp" #ifdef VIENNACL_WITH_OPENCL #include "viennacl/circulant_matrix.hpp" #include "viennacl/hankel_matrix.hpp" #include "viennacl/toeplitz_matrix.hpp" #include "viennacl/vandermonde_matrix.hpp" #endif #include "viennacl/linalg/ilu.hpp" #include "viennacl/linalg/row_scaling.hpp" #include "viennacl/linalg/jacobi_precond.hpp" #include "viennacl/linalg/cg.hpp" #include "viennacl/linalg/bicgstab.hpp" #include "viennacl/linalg/gmres.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/qr.hpp" #include "viennacl/misc/bandwidth_reduction.hpp" #ifdef VIENNACL_WITH_OPENCL #include "viennacl/linalg/amg.hpp" #include "viennacl/linalg/spai.hpp" #include "viennacl/linalg/svd.hpp" #include "viennacl/fft.hpp" #include "viennacl/generator/generate.hpp" #endif #include "viennacl/io/matrix_market.hpp" #include "viennacl/scheduler/execute.hpp" void other_func() { typedef float NumericType; //doing nothing but instantiating a few types viennacl::scalar s; viennacl::vector v(10); viennacl::matrix m(10, 10); viennacl::compressed_matrix compr(10, 10); viennacl::coordinate_matrix coord(10, 10); } ViennaCL-1.5.1-src/tests/src/matrix_row_float.cpp000644 001750 001750 00000003330 12267307531 022002 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_float_double.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, row-major, single precision " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; double epsilon = 1e-4; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " --- row-major ---" << std::endl; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/matrix_col_double.cpp000644 001750 001750 00000003447 12267307531 022126 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_float_double.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, column-major, double precision " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { double epsilon = 1e-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/structured-matrices.cpp000644 001750 001750 00000043273 12267307531 022445 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include #include #include #include #include //#define VIENNACL_BUILD_INFO //#define VIENNACL_DEBUG_ALL #include "viennacl/toeplitz_matrix.hpp" #include "viennacl/circulant_matrix.hpp" #include "viennacl/vandermonde_matrix.hpp" #include "viennacl/hankel_matrix.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/fft.hpp" // // A simple dense matrix class (in order to avoid an unnecessary boost dependency) // template class dense_matrix { public: typedef std::size_t size_type; dense_matrix(std::size_t rows, std::size_t cols) : elements_(rows * cols), rows_(rows), cols_(cols) {} T & operator()(std::size_t i, std::size_t j) { return elements_[i*cols_ + j]; } T const & operator()(std::size_t i, std::size_t j) const { return elements_[i*cols_ + j]; } std::size_t size1() const { return rows_; } std::size_t size2() const { return cols_; } dense_matrix & operator+=(dense_matrix const & other) { for(std::size_t i = 0; i < other.size1(); i++) for(std::size_t j = 0; j < other.size2(); j++) elements_[i*cols_ + j] = other.elements_[i*cols_+j]; return *this; } private: std::vector elements_; std::size_t rows_; std::size_t cols_; }; template std::ostream & operator<<(std::ostream & os, dense_matrix const & mat) { std::cout << "[" << mat.size1() << "," << mat.size2() << "]("; for (std::size_t i=0; i ScalarType diff(dense_matrix const & m1, dense_matrix const & m2) { ScalarType df = 0.0; ScalarType d1 = 0; ScalarType d2 = 0; for(std::size_t i = 0; i < m1.size1(); i++) for(std::size_t j = 0; j < m1.size2(); j++) { df += (m1(i,j) - m2(i,j)) * (m1(i,j) - m2(i,j)); d1 += m1(i,j) * m1(i,j); d2 += m2(i,j) * m2(i,j); } if ( (d1 == 0) && (d2 == 0) ) return 0; return std::sqrt(df / std::max(d1, d2)); } template ScalarType diff(std::vector& vec, std::vector& ref) { ScalarType df = 0.0; ScalarType norm_ref = 0; for(std::size_t i = 0; i < vec.size(); i++) { df = df + pow(vec[i] - ref[i], 2); norm_ref += ref[i] * ref[i]; } return std::sqrt(df / norm_ref) ; } template ScalarType diff_max(std::vector& vec, std::vector& ref) { ScalarType df = 0.0; ScalarType mx = 0.0; ScalarType norm_max = 0; for (std::size_t i = 0; i < vec.size(); i++) { df = std::max(fabs(vec[i] - ref[i]), df); mx = std::max(fabs(vec[i]), mx); if (mx > 0) { if (norm_max < df / mx) norm_max = df / mx; } } return norm_max; } template void transpose_test() { int w = 5, h = 7; std::vector s_normal(2 * w * h); viennacl::matrix normal(w, 2 * h); viennacl::matrix transp(h, 2 * w); for(unsigned int i = 0; i < s_normal.size(); i+=2) { s_normal[i] = i; s_normal[i+1] = i; } viennacl::fast_copy(&s_normal[0], &s_normal[0] + s_normal.size(), normal); std::cout << normal << std::endl; viennacl::detail::fft::transpose(normal); std::cout << normal << std::endl; } template int toeplitz_test(ScalarType epsilon) { std::size_t TOEPLITZ_SIZE = 47; viennacl::toeplitz_matrix vcl_toeplitz1(TOEPLITZ_SIZE, TOEPLITZ_SIZE); viennacl::toeplitz_matrix vcl_toeplitz2(TOEPLITZ_SIZE, TOEPLITZ_SIZE); viennacl::vector vcl_input(TOEPLITZ_SIZE); viennacl::vector vcl_result(TOEPLITZ_SIZE); std::vector input_ref(TOEPLITZ_SIZE); std::vector result_ref(TOEPLITZ_SIZE); dense_matrix m1(TOEPLITZ_SIZE, TOEPLITZ_SIZE); dense_matrix m2(TOEPLITZ_SIZE, TOEPLITZ_SIZE); for(std::size_t i = 0; i < TOEPLITZ_SIZE; i++) for(std::size_t j = 0; j < TOEPLITZ_SIZE; j++) { m1(i,j) = static_cast(i) - static_cast(j); m2(i,j) = m1(i,j) * m1(i,j) + ScalarType(1); } for(std::size_t i = 0; i < TOEPLITZ_SIZE; i++) input_ref[i] = ScalarType(i); // Copy to ViennaCL viennacl::copy(m1, vcl_toeplitz1); viennacl::copy(m2, vcl_toeplitz2); viennacl::copy(input_ref, vcl_input); // // Matrix-Vector product: // vcl_result = viennacl::linalg::prod(vcl_toeplitz1, vcl_input); for(std::size_t i = 0; i < m1.size1(); i++) //reference calculation { ScalarType entry = 0; for(std::size_t j = 0; j < m1.size2(); j++) entry += m1(i,j) * input_ref[j]; result_ref[i] = entry; } viennacl::copy(vcl_result, input_ref); std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref); if (diff_max(input_ref, result_ref) < epsilon) std::cout << " [OK]" << std::endl; else { for (std::size_t i=0; i int circulant_test(ScalarType epsilon) { std::size_t CIRCULANT_SIZE = 53; viennacl::circulant_matrix vcl_circulant1(CIRCULANT_SIZE, CIRCULANT_SIZE); viennacl::circulant_matrix vcl_circulant2(CIRCULANT_SIZE, CIRCULANT_SIZE); viennacl::vector vcl_input(CIRCULANT_SIZE); viennacl::vector vcl_result(CIRCULANT_SIZE); std::vector input_ref(CIRCULANT_SIZE); std::vector result_ref(CIRCULANT_SIZE); dense_matrix m1(vcl_circulant1.size1(), vcl_circulant1.size2()); dense_matrix m2(vcl_circulant1.size1(), vcl_circulant1.size2()); for(std::size_t i = 0; i < m1.size1(); i++) for(std::size_t j = 0; j < m1.size2(); j++) { m1(i,j) = static_cast((i - j + m1.size1()) % m1.size1()); m2(i,j) = m1(i,j) * m1(i,j) + ScalarType(1); } for(std::size_t i = 0; i < input_ref.size(); i++) input_ref[i] = ScalarType(i); // Copy to ViennaCL viennacl::copy(m1, vcl_circulant1); viennacl::copy(m2, vcl_circulant2); viennacl::copy(input_ref, vcl_input); // // Matrix-Vector product: // vcl_result = viennacl::linalg::prod(vcl_circulant1, vcl_input); for(std::size_t i = 0; i < m1.size1(); i++) //reference calculation { ScalarType entry = 0; for(std::size_t j = 0; j < m1.size2(); j++) entry += m1(i,j) * input_ref[j]; result_ref[i] = entry; } viennacl::copy(vcl_result, input_ref); std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref); if (diff_max(input_ref, result_ref) < epsilon) std::cout << " [OK]" << std::endl; else { for (std::size_t i=0; i int vandermonde_test(ScalarType epsilon) { std::size_t VANDERMONDE_SIZE = 61; viennacl::vandermonde_matrix vcl_vandermonde1(VANDERMONDE_SIZE, VANDERMONDE_SIZE); viennacl::vandermonde_matrix vcl_vandermonde2(VANDERMONDE_SIZE, VANDERMONDE_SIZE); viennacl::vector vcl_input(VANDERMONDE_SIZE); viennacl::vector vcl_result(VANDERMONDE_SIZE); std::vector input_ref(VANDERMONDE_SIZE); std::vector result_ref(VANDERMONDE_SIZE); dense_matrix m1(vcl_vandermonde1.size1(), vcl_vandermonde1.size2()); dense_matrix m2(m1.size1(), m1.size2()); for(std::size_t i = 0; i < m1.size1(); i++) for(std::size_t j = 0; j < m1.size2(); j++) { m1(i,j) = std::pow(ScalarType(1.0 + i/1000.0), ScalarType(j)); m2(i,j) = std::pow(ScalarType(1.0 - i/2000.0), ScalarType(j)); } for(std::size_t i = 0; i < input_ref.size(); i++) input_ref[i] = ScalarType(i); // Copy to ViennaCL viennacl::copy(m1, vcl_vandermonde1); viennacl::copy(m2, vcl_vandermonde2); viennacl::copy(input_ref, vcl_input); // // Matrix-Vector product: // vcl_result = viennacl::linalg::prod(vcl_vandermonde1, vcl_input); for(std::size_t i = 0; i < m1.size1(); i++) //reference calculation { ScalarType entry = 0; for(std::size_t j = 0; j < m1.size2(); j++) entry += m1(i,j) * input_ref[j]; result_ref[i] = entry; } viennacl::copy(vcl_result, input_ref); std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref); if (diff_max(input_ref, result_ref) < epsilon) std::cout << " [OK]" << std::endl; else { for (std::size_t i=0; i(1.0001); for(std::size_t j = 0; j < m1.size2(); j++) { m1(4, j) = std::pow(ScalarType(1.0001), ScalarType(j)); } viennacl::copy(vcl_vandermonde1, m2); std::cout << "Element manipulation: " << diff(m1, m2); if (diff(m1, m2) < epsilon) std::cout << " [OK]" << std::endl; else { std::cout << " [FAILED]" << std::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; } template int hankel_test(ScalarType epsilon) { std::size_t HANKEL_SIZE = 7; viennacl::hankel_matrix vcl_hankel1(HANKEL_SIZE, HANKEL_SIZE); viennacl::hankel_matrix vcl_hankel2(HANKEL_SIZE, HANKEL_SIZE); viennacl::vector vcl_input(HANKEL_SIZE); viennacl::vector vcl_result(HANKEL_SIZE); std::vector input_ref(HANKEL_SIZE); std::vector result_ref(HANKEL_SIZE); dense_matrix m1(vcl_hankel1.size1(), vcl_hankel1.size2()); dense_matrix m2(m1.size1(), m1.size2()); for(std::size_t i = 0; i < m1.size1(); i++) for(std::size_t j = 0; j < m1.size2(); j++) { m1(i,j) = static_cast((i + j) % (2 * m1.size1())); m2(i,j) = m1(i,j) * m1(i,j) + ScalarType(1); } for(std::size_t i = 0; i < input_ref.size(); i++) input_ref[i] = ScalarType(i); // Copy to ViennaCL viennacl::copy(m1, vcl_hankel1); viennacl::copy(m2, vcl_hankel2); viennacl::copy(input_ref, vcl_input); // // Matrix-Vector product: // vcl_result = viennacl::linalg::prod(vcl_hankel1, vcl_input); for(std::size_t i = 0; i < m1.size1(); i++) //reference calculation { ScalarType entry = 0; for(std::size_t j = 0; j < m1.size2(); j++) entry += m1(i,j) * input_ref[j]; result_ref[i] = entry; } viennacl::copy(vcl_result, input_ref); std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref); if (diff_max(input_ref, result_ref) < epsilon) std::cout << " [OK]" << std::endl; else { for (std::size_t i=0; i(static_cast(eps)) == EXIT_FAILURE) return EXIT_FAILURE; std::cout << " -- Circulant matrix -- " << std::endl; if (circulant_test(static_cast(eps)) == EXIT_FAILURE) return EXIT_FAILURE; std::cout << " -- Toeplitz matrix -- " << std::endl; if (toeplitz_test(static_cast(eps)) == EXIT_FAILURE) return EXIT_FAILURE; std::cout << " -- Hankel matrix -- " << std::endl; if (hankel_test(static_cast(eps)) == EXIT_FAILURE) return EXIT_FAILURE; std::cout << std::endl; if( viennacl::ocl::current_device().double_support() ) { eps = 1e-10; std::cout << std::endl; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << eps << std::endl; std::cout << " numeric: double" << std::endl; std::cout << std::endl; std::cout << " -- Vandermonde matrix -- " << std::endl; if (vandermonde_test(eps) == EXIT_FAILURE) return EXIT_FAILURE; std::cout << " -- Circulant matrix -- " << std::endl; if (circulant_test(eps) == EXIT_FAILURE) return EXIT_FAILURE; std::cout << " -- Toeplitz matrix -- " << std::endl; if (toeplitz_test(eps) == EXIT_FAILURE) return EXIT_FAILURE; std::cout << " -- Hankel matrix -- " << std::endl; if (hankel_test(eps) == EXIT_FAILURE) return EXIT_FAILURE; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/matrix_col_float.cu000644 001750 001750 00000003342 12267307531 021600 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_float_double.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, column-major, single precision " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; double epsilon = 1e-4; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " --- column-major ---" << std::endl; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/blas3_solve_float.cpp000644 001750 001750 00000050035 12267307531 022027 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ //#define NDEBUG //#define VIENNACL_DEBUG_BUILD // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL //#define VIENNACL_DEBUG_BUILD #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/matrix_proxy.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "examples/tutorial/Random.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(fabs(s1), fabs(s2)); return 0; } template ScalarType diff(ublas::vector & v1, viennacl::vector & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); viennacl::backend::finish(); for (std::size_t i=0;i 0 ) v2_cpu[i] = fabs(v2_cpu[i] - v1[i]) / std::max( fabs(v2_cpu[i]), fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return norm_inf(v2_cpu); } template ScalarType diff(ublas::matrix & mat1, VCLMatrixType & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } // // Triangular solvers // template void run_solver_check(RHSTypeRef & B_ref, RHSTypeCheck & B_check, int & retval, Epsilon const & epsilon) { double act_diff = fabs(diff(B_ref, B_check)); if( act_diff > epsilon ) { std::cout << " FAILED!" << std::endl; std::cout << "# Error at operation: matrix-matrix solve" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << " passed! " << act_diff << std::endl; } template< typename NumericT, typename Epsilon, typename ReferenceMatrixTypeA, typename ReferenceMatrixTypeB, typename ReferenceMatrixTypeC, typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC, typename MatrixTypeResult> int test_solve(Epsilon const& epsilon, ReferenceMatrixTypeA const & A, ReferenceMatrixTypeB const & B_start, ReferenceMatrixTypeC const & C_start, MatrixTypeA const & vcl_A, MatrixTypeB & vcl_B, MatrixTypeC & vcl_C, MatrixTypeResult const & ) { int retval = EXIT_SUCCESS; // -------------------------------------------------------------------------- ReferenceMatrixTypeA result; ReferenceMatrixTypeC C_trans; ReferenceMatrixTypeB B = B_start; ReferenceMatrixTypeC C = C_start; MatrixTypeResult vcl_result; // Test: A \ B with various tags -------------------------------------------------------------------------- std::cout << "Testing A \\ B: " << std::endl; std::cout << " * upper_tag: "; result = ublas::solve(A, B, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_upper_tag: "; result = ublas::solve(A, B, ublas::unit_upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::unit_upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * lower_tag: "; result = ublas::solve(A, B, ublas::lower_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_lower_tag: "; result = ublas::solve(A, B, ublas::unit_lower_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::unit_lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A \\ B passed!" << std::endl; B = B_start; C = C_start; // Test: A \ B^T -------------------------------------------------------------------------- std::cout << "Testing A \\ B^T: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); //check solve(): result = ublas::solve(A, C_trans, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, trans(vcl_C), viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); //check compute kernels: std::cout << " * upper_tag: "; ublas::inplace_solve(A, C_trans, ublas::upper_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::unit_upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::lower_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::unit_lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A \\ B^T passed!" << std::endl; B = B_start; C = C_start; // Test: A \ B with various tags -------------------------------------------------------------------------- std::cout << "Testing A^T \\ B: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::unit_upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::unit_upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::lower_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::unit_lower_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::unit_lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A^T \\ B passed!" << std::endl; B = B_start; C = C_start; // Test: A^T \ B^T -------------------------------------------------------------------------- std::cout << "Testing A^T \\ B^T: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); //check solve(): result = ublas::solve(trans(A), C_trans, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); //check kernels: std::cout << " * upper_tag: "; ublas::inplace_solve(trans(A), C_trans, ublas::upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::unit_upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::unit_lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A^T \\ B^T passed!" << std::endl; return retval; } template< typename NumericT, typename F_A, typename F_B, typename Epsilon > int test_solve(Epsilon const& epsilon) { int ret = EXIT_SUCCESS; long matrix_size = 135; //some odd number, not too large long rhs_num = 67; std::cout << "--- Part 2: Testing matrix-matrix solver ---" << std::endl; ublas::matrix A(matrix_size, matrix_size); ublas::matrix B_start(matrix_size, rhs_num); ublas::matrix C_start(rhs_num, matrix_size); for (std::size_t i = 0; i < A.size1(); ++i) { for (std::size_t j = 0; j < A.size2(); ++j) A(i,j) = static_cast(-0.5) * random(); A(i,i) = NumericT(1.0) + NumericT(2.0) * random(); //some extra weight on diagonal for stability } for (std::size_t i = 0; i < B_start.size1(); ++i) for (std::size_t j = 0; j < B_start.size2(); ++j) B_start(i,j) = random(); for (std::size_t i = 0; i < C_start.size1(); ++i) for (std::size_t j = 0; j < C_start.size2(); ++j) C_start(i,j) = random(); // A viennacl::range range1_A(matrix_size, 2*matrix_size); viennacl::range range2_A(2*matrix_size, 3*matrix_size); viennacl::slice slice1_A(matrix_size, 2, matrix_size); viennacl::slice slice2_A(0, 3, matrix_size); viennacl::matrix vcl_A(matrix_size, matrix_size); viennacl::copy(A, vcl_A); viennacl::matrix vcl_big_range_A(4*matrix_size, 4*matrix_size); viennacl::matrix_range > vcl_range_A(vcl_big_range_A, range1_A, range2_A); viennacl::copy(A, vcl_range_A); viennacl::matrix vcl_big_slice_A(4*matrix_size, 4*matrix_size); viennacl::matrix_slice > vcl_slice_A(vcl_big_slice_A, slice1_A, slice2_A); viennacl::copy(A, vcl_slice_A); // B viennacl::range range1_B(matrix_size, 2*matrix_size); viennacl::range range2_B(2*rhs_num, 3*rhs_num); viennacl::slice slice1_B(matrix_size, 2, matrix_size); viennacl::slice slice2_B(0, 3, rhs_num); viennacl::matrix vcl_B(matrix_size, rhs_num); viennacl::copy(B_start, vcl_B); viennacl::matrix vcl_big_range_B(4*matrix_size, 4*rhs_num); viennacl::matrix_range > vcl_range_B(vcl_big_range_B, range1_B, range2_B); viennacl::copy(B_start, vcl_range_B); viennacl::matrix vcl_big_slice_B(4*matrix_size, 4*rhs_num); viennacl::matrix_slice > vcl_slice_B(vcl_big_slice_B, slice1_B, slice2_B); viennacl::copy(B_start, vcl_slice_B); // C viennacl::range range1_C(rhs_num, 2*rhs_num); viennacl::range range2_C(2*matrix_size, 3*matrix_size); viennacl::slice slice1_C(rhs_num, 2, rhs_num); viennacl::slice slice2_C(0, 3, matrix_size); viennacl::matrix vcl_C(rhs_num, matrix_size); viennacl::copy(C_start, vcl_C); viennacl::matrix vcl_big_range_C(4*rhs_num, 4*matrix_size); viennacl::matrix_range > vcl_range_C(vcl_big_range_C, range1_C, range2_C); viennacl::copy(C_start, vcl_range_C); viennacl::matrix vcl_big_slice_C(4*rhs_num, 4*matrix_size); viennacl::matrix_slice > vcl_slice_C(vcl_big_slice_C, slice1_C, slice2_C); viennacl::copy(C_start, vcl_slice_C); std::cout << "Now using A=matrix, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=matrix, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=matrix, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; return ret; } // // Control functions // template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=row ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=col ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=row ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=col ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; return ret; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: BLAS 3 routines" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/matrix_int.hpp000644 001750 001750 00000104760 12267307531 020616 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #define VIENNACL_WITH_UBLAS //#define NDEBUG //#define VIENNACL_BUILD_INFO #include #include #include #include #include #include #include #include //#include "../benchmarks/benchmark-utils.hpp" #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/prod.hpp" /*#include "viennacl/compressed_matrix.hpp" #include "viennacl/linalg/cg.hpp" #include "viennacl/linalg/inner_prod.hpp" #include "viennacl/linalg/ilu.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/io/matrix_market.hpp"*/ #include "viennacl/matrix_proxy.hpp" #include "viennacl/vector_proxy.hpp" #include "boost/numeric/ublas/vector.hpp" #include "boost/numeric/ublas/matrix.hpp" #include "boost/numeric/ublas/matrix_proxy.hpp" #include "boost/numeric/ublas/vector_proxy.hpp" #include "boost/numeric/ublas/io.hpp" using namespace boost::numeric; template bool check_for_equality(MatrixType const & ublas_A, VCLMatrixType const & vcl_A) { typedef typename MatrixType::value_type value_type; boost::numeric::ublas::matrix vcl_A_cpu(vcl_A.size1(), vcl_A.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(vcl_A, vcl_A_cpu); for (std::size_t i=0; i int run_test(UBLASMatrixType & ublas_A, UBLASMatrixType & ublas_B, UBLASMatrixType & ublas_C, ViennaCLMatrixType1 & vcl_A, ViennaCLMatrixType2 & vcl_B, ViennaCLMatrixType3 vcl_C) { typedef typename viennacl::result_of::cpu_value_type::type cpu_value_type; cpu_value_type alpha = 3; viennacl::scalar gpu_alpha = alpha; cpu_value_type beta = 2; viennacl::scalar gpu_beta = beta; // // Initializer: // std::cout << "Checking for zero_matrix initializer..." << std::endl; ublas_A = ublas::zero_matrix(ublas_A.size1(), ublas_A.size2()); vcl_A = viennacl::zero_matrix(vcl_A.size1(), vcl_A.size2()); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; std::cout << "Checking for scalar_matrix initializer..." << std::endl; ublas_A = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size2(), alpha); vcl_A = viennacl::scalar_matrix(vcl_A.size1(), vcl_A.size2(), alpha); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size2(), gpu_beta); vcl_A = viennacl::scalar_matrix( vcl_A.size1(), vcl_A.size2(), gpu_beta); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; /* std::cout << "Checking for identity initializer..." << std::endl; ublas_A = ublas::identity_matrix(ublas_A.size1()); vcl_A = viennacl::identity_matrix(vcl_A.size1()); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; */ std::cout << std::endl; //std::cout << "//" << std::endl; //std::cout << "////////// Test: Assignments //////////" << std::endl; //std::cout << "//" << std::endl; if (!check_for_equality(ublas_B, vcl_B)) return EXIT_FAILURE; std::cout << "Testing matrix assignment... "; //std::cout << ublas_B(0,0) << " vs. " << vcl_B(0,0) << std::endl; ublas_A = ublas_B; vcl_A = vcl_B; if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; //std::cout << std::endl; //std::cout << "//" << std::endl; //std::cout << "////////// Test 1: Copy to GPU //////////" << std::endl; //std::cout << "//" << std::endl; ublas_A = ublas_B; viennacl::copy(ublas_B, vcl_A); std::cout << "Testing upper left copy to GPU... "; if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_C = ublas_B; viennacl::copy(ublas_B, vcl_C); std::cout << "Testing lower right copy to GPU... "; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; //std::cout << std::endl; //std::cout << "//" << std::endl; //std::cout << "////////// Test 2: Copy from GPU //////////" << std::endl; //std::cout << "//" << std::endl; std::cout << "Testing upper left copy to A... "; if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; std::cout << "Testing lower right copy to C... "; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; //std::cout << "//" << std::endl; //std::cout << "////////// Test 3: Addition //////////" << std::endl; //std::cout << "//" << std::endl; viennacl::copy(ublas_C, vcl_C); std::cout << "Inplace add: "; ublas_C += ublas_C; vcl_C += vcl_C; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Scaled inplace add: "; ublas_C += beta * ublas_A; vcl_C += gpu_beta * vcl_A; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Add: "; ublas_C = ublas_A + ublas_B; vcl_C = vcl_A + vcl_B; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Add with flipsign: "; ublas_C = - ublas_A + ublas_B; vcl_C = - vcl_A + vcl_B; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Scaled add (left): "; ublas_C = alpha * ublas_A + ublas_B; vcl_C = alpha * vcl_A + vcl_B; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Scaled add (left): "; vcl_C = gpu_alpha * vcl_A + vcl_B; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Scaled add (right): "; ublas_C = ublas_A + beta * ublas_B; vcl_C = vcl_A + beta * vcl_B; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Scaled add (right): "; vcl_C = vcl_A + gpu_beta * vcl_B; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Scaled add (both): "; ublas_C = alpha * ublas_A + beta * ublas_B; vcl_C = alpha * vcl_A + beta * vcl_B; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Scaled add (both): "; vcl_C = gpu_alpha * vcl_A + gpu_beta * vcl_B; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; //std::cout << "//" << std::endl; //std::cout << "////////// Test 4: Subtraction //////////" << std::endl; //std::cout << "//" << std::endl; viennacl::copy(ublas_C, vcl_C); std::cout << "Inplace sub: "; ublas_C -= ublas_B; vcl_C -= vcl_B; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Scaled Inplace sub: "; ublas_C -= alpha * ublas_B; vcl_C -= alpha * vcl_B; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Sub: "; ublas_C = ublas_A - ublas_B; vcl_C = vcl_A - vcl_B; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; std::cout << "Scaled sub (left): "; ublas_B = alpha * ublas_A - ublas_C; vcl_B = alpha * vcl_A - vcl_C; if (!check_for_equality(ublas_B, vcl_B)) return EXIT_FAILURE; std::cout << "Scaled sub (left): "; vcl_B = gpu_alpha * vcl_A - vcl_C; if (!check_for_equality(ublas_B, vcl_B)) return EXIT_FAILURE; std::cout << "Scaled sub (right): "; ublas_B = ublas_A - beta * ublas_C; vcl_B = vcl_A - vcl_C * beta; if (!check_for_equality(ublas_B, vcl_B)) return EXIT_FAILURE; std::cout << "Scaled sub (right): "; vcl_B = vcl_A - vcl_C * gpu_beta; if (!check_for_equality(ublas_B, vcl_B)) return EXIT_FAILURE; std::cout << "Scaled sub (both): "; ublas_B = alpha * ublas_A - beta * ublas_C; vcl_B = alpha * vcl_A - vcl_C * beta; if (!check_for_equality(ublas_B, vcl_B)) return EXIT_FAILURE; std::cout << "Scaled sub (both): "; vcl_B = gpu_alpha * vcl_A - vcl_C * gpu_beta; if (!check_for_equality(ublas_B, vcl_B)) return EXIT_FAILURE; std::cout << "Unary operator-: "; ublas_C = - ublas_A; vcl_C = - vcl_A; if (!check_for_equality(ublas_C, vcl_C)) return EXIT_FAILURE; //std::cout << "//" << std::endl; //std::cout << "////////// Test 5: Scaling //////////" << std::endl; //std::cout << "//" << std::endl; viennacl::copy(ublas_A, vcl_A); std::cout << "Multiplication with CPU scalar: "; ublas_A *= alpha; vcl_A *= alpha; if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; std::cout << "Multiplication with GPU scalar: "; ublas_A *= beta; vcl_A *= gpu_beta; if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; std::cout << "Division with CPU scalar: "; ublas_A /= alpha; vcl_A /= alpha; if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; std::cout << "Division with GPU scalar: "; ublas_A /= beta; vcl_A /= gpu_beta; if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; std::cout << "Testing elementwise multiplication..." << std::endl; ublas_B = ublas::scalar_matrix(ublas_B.size1(), ublas_B.size2(), 2); ublas_A = 3 * ublas_B; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_B, vcl_B); ublas_A = ublas::element_prod(ublas_A, ublas_B); vcl_A = viennacl::linalg::element_prod(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A += ublas::element_prod(ublas_A, ublas_B); vcl_A += viennacl::linalg::element_prod(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A -= ublas::element_prod(ublas_A, ublas_B); vcl_A -= viennacl::linalg::element_prod(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; /////// ublas_A = ublas::element_prod(ublas_A + ublas_B, ublas_B); vcl_A = viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A += ublas::element_prod(ublas_A + ublas_B, ublas_B); vcl_A += viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A -= ublas::element_prod(ublas_A + ublas_B, ublas_B); vcl_A -= viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; /////// ublas_A = ublas::element_prod(ublas_A, ublas_B + ublas_A); vcl_A = viennacl::linalg::element_prod(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A += ublas::element_prod(ublas_A, ublas_B + ublas_A); vcl_A += viennacl::linalg::element_prod(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A -= ublas::element_prod(ublas_A, ublas_B + ublas_A); vcl_A -= viennacl::linalg::element_prod(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; /////// ublas_A = ublas::element_prod(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A = viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A += ublas::element_prod(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A += viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A -= ublas::element_prod(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A -= viennacl::linalg::element_prod(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_B = ublas::scalar_matrix(ublas_B.size1(), ublas_B.size2(), 2); ublas_A = 3 * ublas_B; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_B, vcl_B); ublas_A = ublas::element_div(ublas_A, ublas_B); vcl_A = viennacl::linalg::element_div(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A += ublas::element_div(ublas_A, ublas_B); vcl_A += viennacl::linalg::element_div(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A -= ublas::element_div(ublas_A, ublas_B); vcl_A -= viennacl::linalg::element_div(vcl_A, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; /////// ublas_A = ublas::element_div(ublas_A + ublas_B, ublas_B); vcl_A = viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A += ublas::element_div(ublas_A + ublas_B, ublas_B); vcl_A += viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A -= ublas::element_div(ublas_A + ublas_B, ublas_B); vcl_A -= viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; /////// ublas_A = ublas::element_div(ublas_A, ublas_B + ublas_A); vcl_A = viennacl::linalg::element_div(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A += ublas::element_div(ublas_A, ublas_B + ublas_A); vcl_A += viennacl::linalg::element_div(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A -= ublas::element_div(ublas_A, ublas_B + ublas_A); vcl_A -= viennacl::linalg::element_div(vcl_A, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; /////// ublas_A = ublas::element_div(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A = viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A += ublas::element_div(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A += viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; ublas_A -= ublas::element_div(ublas_A + ublas_B, ublas_B + ublas_A); vcl_A -= viennacl::linalg::element_div(vcl_A + vcl_B, vcl_B + vcl_A); if (!check_for_equality(ublas_A, vcl_A)) return EXIT_FAILURE; std::cout << "Testing unary elementwise operations..." << std::endl; #define GENERATE_UNARY_OP_TEST(FUNCNAME) \ ublas_B = ublas::scalar_matrix(ublas_B.size1(), ublas_B.size2(), 1); \ ublas_A = 3 * ublas_B; \ ublas_C = 2 * ublas_A; \ viennacl::copy(ublas_A, vcl_A); \ viennacl::copy(ublas_B, vcl_B); \ viennacl::copy(ublas_C, vcl_C); \ viennacl::copy(ublas_B, vcl_B); \ \ for (std::size_t i=0; i int run_test() { //typedef float ScalarType; typedef boost::numeric::ublas::matrix MatrixType; typedef viennacl::matrix VCLMatrixType; std::size_t dim_rows = 131; std::size_t dim_cols = 33; //std::size_t dim_rows = 5; //std::size_t dim_cols = 3; //setup ublas objects: MatrixType ublas_A(dim_rows, dim_cols); MatrixType ublas_B(dim_rows, dim_cols); MatrixType ublas_C(dim_rows, dim_cols); for (std::size_t i=0; i vcl_range_A(vcl_A_full, vcl_A_r1, vcl_A_r2); viennacl::slice vcl_A_s1(2, 3, dim_rows); viennacl::slice vcl_A_s2(2 * dim_cols, 2, dim_cols); viennacl::matrix_slice vcl_slice_A(vcl_A_full, vcl_A_s1, vcl_A_s2); // // Create B // VCLMatrixType vcl_B(dim_rows, dim_cols); viennacl::range vcl_B_r1(dim_rows, 2 * dim_rows); viennacl::range vcl_B_r2(2 * dim_cols, 3 * dim_cols); viennacl::matrix_range vcl_range_B(vcl_B_full, vcl_B_r1, vcl_B_r2); viennacl::slice vcl_B_s1(2 * dim_rows, 2, dim_rows); viennacl::slice vcl_B_s2(dim_cols, 3, dim_cols); viennacl::matrix_slice vcl_slice_B(vcl_B_full, vcl_B_s1, vcl_B_s2); // // Create C // VCLMatrixType vcl_C(dim_rows, dim_cols); viennacl::range vcl_C_r1(2 * dim_rows, 3 * dim_rows); viennacl::range vcl_C_r2(3 * dim_cols, 4 * dim_cols); viennacl::matrix_range vcl_range_C(vcl_C_full, vcl_C_r1, vcl_C_r2); viennacl::slice vcl_C_s1(dim_rows, 2, dim_rows); viennacl::slice vcl_C_s2(0, 3, dim_cols); viennacl::matrix_slice vcl_slice_C(vcl_C_full, vcl_C_s1, vcl_C_s2); viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); viennacl::copy(ublas_C, vcl_range_C); viennacl::copy(ublas_C, vcl_slice_C); std::cout << std::endl; std::cout << "//" << std::endl; std::cout << "////////// Test: Copy CTOR //////////" << std::endl; std::cout << "//" << std::endl; { std::cout << "Testing matrix created from range... "; VCLMatrixType vcl_temp = vcl_range_A; if (check_for_equality(ublas_A, vcl_temp)) std::cout << "PASSED!" << std::endl; else { std::cout << "ublas_A: " << ublas_A << std::endl; std::cout << "vcl_temp: " << vcl_temp << std::endl; std::cout << "vcl_range_A: " << vcl_range_A << std::endl; std::cout << "vcl_A: " << vcl_A << std::endl; std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } std::cout << "Testing matrix created from slice... "; VCLMatrixType vcl_temp2 = vcl_range_B; if (check_for_equality(ublas_B, vcl_temp2)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } } std::cout << "//" << std::endl; std::cout << "////////// Test: Initializer for matrix type //////////" << std::endl; std::cout << "//" << std::endl; { ublas::matrix ublas_dummy1 = ublas::identity_matrix(ublas_A.size1()); ublas::matrix ublas_dummy2 = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3); ublas::matrix ublas_dummy3 = ublas::zero_matrix(ublas_A.size1(), ublas_A.size1()); viennacl::matrix vcl_dummy1 = viennacl::identity_matrix(ublas_A.size1()); viennacl::matrix vcl_dummy2 = viennacl::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3); viennacl::matrix vcl_dummy3 = viennacl::zero_matrix(ublas_A.size1(), ublas_A.size1()); std::cout << "Testing initializer CTOR... "; if ( check_for_equality(ublas_dummy1, vcl_dummy1) && check_for_equality(ublas_dummy2, vcl_dummy2) && check_for_equality(ublas_dummy3, vcl_dummy3) ) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } ublas_dummy1 = ublas::zero_matrix(ublas_A.size1(), ublas_A.size1()); ublas_dummy2 = ublas::identity_matrix(ublas_A.size1()); ublas_dummy3 = ublas::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3); vcl_dummy1 = viennacl::zero_matrix(ublas_A.size1(), ublas_A.size1()); vcl_dummy2 = viennacl::identity_matrix(ublas_A.size1()); vcl_dummy3 = viennacl::scalar_matrix(ublas_A.size1(), ublas_A.size1(), 3); std::cout << "Testing initializer assignment... "; if ( check_for_equality(ublas_dummy1, vcl_dummy1) && check_for_equality(ublas_dummy2, vcl_dummy2) && check_for_equality(ublas_dummy3, vcl_dummy3) ) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!" << std::endl; return EXIT_FAILURE; } } // // run operation tests: // /////// A=matrix: std::cout << "Testing A=matrix, B=matrix, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_A, vcl_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=matrix, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_A, vcl_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=matrix, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=range, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_A, vcl_range_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=range, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=range, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=slice, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=slice, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=matrix, B=slice, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } /////// A=range: std::cout << "Testing A=range, B=matrix, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=matrix, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=matrix, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=range, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_range_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=range, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=range, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=slice, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=slice, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=range, B=slice, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_range_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_range_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } /////// A=slice: std::cout << "Testing A=slice, B=matrix, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=matrix, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=matrix, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=range, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_range_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=range, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=range, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_range_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=slice, C=matrix ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=slice, C=range ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_range_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } std::cout << "Testing A=slice, B=slice, C=slice ..." << std::endl; viennacl::copy(ublas_A, vcl_slice_A); viennacl::copy(ublas_B, vcl_slice_B); viennacl::copy(ublas_C, vcl_slice_C); if (run_test(ublas_A, ublas_B, ublas_C, vcl_slice_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS) { return EXIT_FAILURE; } return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/global_variables.cpp000644 001750 001750 00000004665 12267307531 021726 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include #include // // *** ViennaCL // #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/coordinate_matrix.hpp" #include "viennacl/ell_matrix.hpp" #include "viennacl/hyb_matrix.hpp" #ifdef VIENNACL_WITH_OPENCL #include "viennacl/circulant_matrix.hpp" #include "viennacl/hankel_matrix.hpp" #include "viennacl/toeplitz_matrix.hpp" #include "viennacl/vandermonde_matrix.hpp" #endif viennacl::scalar s1; viennacl::scalar s2; viennacl::vector v1; viennacl::vector v2; viennacl::matrix m1; //viennacl::matrix m2; // TODO: Add checks for other types // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Instantiation of global variables" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; s1 = viennacl::scalar(1.0f); s2 = viennacl::scalar(1); v1 = viennacl::vector(5); v2 = viennacl::vector(5); m1 = viennacl::matrix(5, 4); //m2 = viennacl::matrix(5, 4); std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } // // ------------------------------------------------------------- // ViennaCL-1.5.1-src/tests/src/libviennacl_blas1.cu000644 001750 001750 00000065224 12267307531 021631 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /* * * Testing the ViennaCL BLAS-like shared library * */ // include necessary system headers #include #include #include // Some helper functions for this tutorial: #include "viennacl.hpp" #include "viennacl/vector.hpp" template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { if (s1 != s2) return (s1 - s2) / std::max(static_cast(std::fabs(static_cast(s1))), static_cast(std::fabs(static_cast(s2)))); return ScalarType(0); } template ScalarType diff(std::vector const & v1, ViennaCLVectorType const & vcl_vec) { std::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); ScalarType inf_norm = 0; for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; if (v2_cpu[i] > inf_norm) inf_norm = v2_cpu[i]; } return inf_norm; } template void check(T const & t, U const & u, EpsilonT eps) { EpsilonT rel_error = static_cast(diff(t,u)); if (rel_error > eps) { std::cerr << "Relative error: " << rel_error << std::endl; std::cerr << "Aborting!" << std::endl; exit(EXIT_FAILURE); } std::cout << "SUCCESS "; } int main() { std::size_t size = 10; // at least 7 float eps_float = 1e-5f; double eps_double = 1e-12; float ref_float_alpha; double ref_double_alpha; std::vector ref_float_x(size, 1.0f); std::vector ref_float_y(size, 2.0f); std::vector ref_double_x(size, 1.0); std::vector ref_double_y(size, 2.0); ViennaCLBackend my_backend; ViennaCLBackendCreate(&my_backend); // Host setup float host_float_alpha = 0; viennacl::vector host_float_x = viennacl::scalar_vector(size, 1.0f, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::vector host_float_y = viennacl::scalar_vector(size, 2.0f, viennacl::context(viennacl::MAIN_MEMORY)); double host_double_alpha = 0; viennacl::vector host_double_x = viennacl::scalar_vector(size, 1.0, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::vector host_double_y = viennacl::scalar_vector(size, 2.0, viennacl::context(viennacl::MAIN_MEMORY)); // CUDA setup #ifdef VIENNACL_WITH_CUDA float cuda_float_alpha = 0; viennacl::vector cuda_float_x = viennacl::scalar_vector(size, 1.0f, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::vector cuda_float_y = viennacl::scalar_vector(size, 2.0f, viennacl::context(viennacl::CUDA_MEMORY)); double cuda_double_alpha = 0; viennacl::vector cuda_double_x = viennacl::scalar_vector(size, 1.0, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::vector cuda_double_y = viennacl::scalar_vector(size, 2.0, viennacl::context(viennacl::CUDA_MEMORY)); #endif // OpenCL setup #ifdef VIENNACL_WITH_OPENCL ViennaCLInt context_id = 0; float opencl_float_alpha = 0; viennacl::vector opencl_float_x = viennacl::scalar_vector(size, 1.0f, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::vector opencl_float_y = viennacl::scalar_vector(size, 2.0f, viennacl::context(viennacl::ocl::get_context(context_id))); double opencl_double_alpha = 0; viennacl::vector *opencl_double_x = NULL; viennacl::vector *opencl_double_y = NULL; if( viennacl::ocl::current_device().double_support() ) { opencl_double_x = new viennacl::vector(viennacl::scalar_vector(size, 1.0, viennacl::context(viennacl::ocl::get_context(context_id)))); opencl_double_y = new viennacl::vector(viennacl::scalar_vector(size, 2.0, viennacl::context(viennacl::ocl::get_context(context_id)))); } ViennaCLBackendSetOpenCLContextID(my_backend, context_id); #endif // consistency checks: check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // ASUM std::cout << std::endl << "-- Testing xASUM..."; ref_float_alpha = 0; ref_double_alpha = 0; for (std::size_t i=0; i(host_float_x), 2, 3); check(ref_float_alpha, host_float_alpha, eps_float); ViennaCLHostDasum(my_backend, ViennaCLInt(size/4), &host_double_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 2, 3); check(ref_double_alpha, host_double_alpha, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASasum(my_backend, ViennaCLInt(size/4), &cuda_float_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 2, 3); check(ref_float_alpha, cuda_float_alpha, eps_float); ViennaCLCUDADasum(my_backend, ViennaCLInt(size/4), &cuda_double_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 2, 3); check(ref_double_alpha, cuda_double_alpha, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSasum(my_backend, ViennaCLInt(size/4), &opencl_float_alpha, viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 3); check(ref_float_alpha, opencl_float_alpha, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDasum(my_backend, ViennaCLInt(size/4), &opencl_double_alpha, viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 3); check(ref_double_alpha, opencl_double_alpha, eps_double); } #endif // AXPY std::cout << std::endl << "-- Testing xAXPY..."; for (std::size_t i=0; i(host_float_x), 0, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 1, 2); check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDaxpy(my_backend, ViennaCLInt(size/3), 2.0, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 0, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 1, 2); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASaxpy(my_backend, ViennaCLInt(size/3), 2.0f, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 0, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 1, 2); check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADaxpy(my_backend, ViennaCLInt(size/3), 2.0, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 0, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 1, 2); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSaxpy(my_backend, ViennaCLInt(size/3), 2.0f, viennacl::traits::opencl_handle(opencl_float_x).get(), 0, 2, viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2); check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDaxpy(my_backend, ViennaCLInt(size/3), 2.0, viennacl::traits::opencl_handle(*opencl_double_x).get(), 0, 2, viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2); check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // COPY std::cout << std::endl << "-- Testing xCOPY..."; for (std::size_t i=0; i(host_float_x), 1, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 0, 2); check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDcopy(my_backend, ViennaCLInt(size/3), viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 1, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 0, 2); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDAScopy(my_backend, ViennaCLInt(size/3), viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 1, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 0, 2); check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADcopy(my_backend, ViennaCLInt(size/3), viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 1, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 0, 2); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLScopy(my_backend, ViennaCLInt(size/3), viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 2, viennacl::traits::opencl_handle(opencl_float_y).get(), 0, 2); check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDcopy(my_backend, ViennaCLInt(size/3), viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 2, viennacl::traits::opencl_handle(*opencl_double_y).get(), 0, 2); check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // DOT std::cout << std::endl << "-- Testing xDOT..."; ref_float_alpha = 0; ref_double_alpha = 0; for (std::size_t i=0; i(host_float_x), 2, 1, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 3, 1); check(ref_float_alpha, host_float_alpha, eps_float); ViennaCLHostDdot(my_backend, ViennaCLInt(size/2), &host_double_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 2, 1, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 3, 1); check(ref_double_alpha, host_double_alpha, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASdot(my_backend, ViennaCLInt(size/2), &cuda_float_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 2, 1, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 3, 1); check(ref_float_alpha, cuda_float_alpha, eps_float); ViennaCLCUDADdot(my_backend, ViennaCLInt(size/2), &cuda_double_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 2, 1, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 3, 1); check(ref_double_alpha, cuda_double_alpha, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSdot(my_backend, ViennaCLInt(size/2), &opencl_float_alpha, viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 1, viennacl::traits::opencl_handle(opencl_float_y).get(), 3, 1); check(ref_float_alpha, opencl_float_alpha, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDdot(my_backend, ViennaCLInt(size/2), &opencl_double_alpha, viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 1, viennacl::traits::opencl_handle(*opencl_double_y).get(), 3, 1); check(ref_double_alpha, opencl_double_alpha, eps_double); } #endif // NRM2 std::cout << std::endl << "-- Testing xNRM2..."; ref_float_alpha = 0; ref_double_alpha = 0; for (std::size_t i=0; i(host_float_x), 1, 2); check(ref_float_alpha, host_float_alpha, eps_float); ViennaCLHostDnrm2(my_backend, ViennaCLInt(size/3), &host_double_alpha, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 1, 2); check(ref_double_alpha, host_double_alpha, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASnrm2(my_backend, ViennaCLInt(size/3), &cuda_float_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 1, 2); check(ref_float_alpha, cuda_float_alpha, eps_float); ViennaCLCUDADnrm2(my_backend, ViennaCLInt(size/3), &cuda_double_alpha, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 1, 2); check(ref_double_alpha, cuda_double_alpha, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSnrm2(my_backend, ViennaCLInt(size/3), &opencl_float_alpha, viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 2); check(ref_float_alpha, opencl_float_alpha, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDnrm2(my_backend, ViennaCLInt(size/3), &opencl_double_alpha, viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 2); check(ref_double_alpha, opencl_double_alpha, eps_double); } #endif // ROT std::cout << std::endl << "-- Testing xROT..."; for (std::size_t i=0; i(host_float_x), 2, 3, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 1, 2, 0.6f, 0.8f); check(ref_float_x, host_float_x, eps_float); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDrot(my_backend, ViennaCLInt(size/4), viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 2, 3, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 1, 2, 0.6, 0.8); check(ref_double_x, host_double_x, eps_double); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASrot(my_backend, ViennaCLInt(size/4), viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 2, 3, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 1, 2, 0.6f, 0.8f); check(ref_float_x, cuda_float_x, eps_float); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADrot(my_backend, ViennaCLInt(size/4), viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 2, 3, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 1, 2, 0.6, 0.8); check(ref_double_x, cuda_double_x, eps_double); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSrot(my_backend, ViennaCLInt(size/4), viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 3, viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2, 0.6f, 0.8f); check(ref_float_x, opencl_float_x, eps_float); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDrot(my_backend, ViennaCLInt(size/4), viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 3, viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2, 0.6, 0.8); check(ref_double_x, *opencl_double_x, eps_double); check(ref_double_y, *opencl_double_y, eps_double); } #endif // SCAL std::cout << std::endl << "-- Testing xSCAL..."; for (std::size_t i=0; i(host_float_x), 1, 3); check(ref_float_x, host_float_x, eps_float); ViennaCLHostDscal(my_backend, ViennaCLInt(size/4), 2.0, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 1, 3); check(ref_double_x, host_double_x, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASscal(my_backend, ViennaCLInt(size/4), 2.0f, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 1, 3); check(ref_float_x, cuda_float_x, eps_float); ViennaCLCUDADscal(my_backend, ViennaCLInt(size/4), 2.0, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 1, 3); check(ref_double_x, cuda_double_x, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSscal(my_backend, ViennaCLInt(size/4), 2.0f, viennacl::traits::opencl_handle(opencl_float_x).get(), 1, 3); check(ref_float_x, opencl_float_x, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDscal(my_backend, ViennaCLInt(size/4), 2.0, viennacl::traits::opencl_handle(*opencl_double_x).get(), 1, 3); check(ref_double_x, *opencl_double_x, eps_double); } #endif // SWAP std::cout << std::endl << "-- Testing xSWAP..."; for (std::size_t i=0; i(host_float_x), 2, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 1, 2); check(ref_float_y, host_float_y, eps_float); ViennaCLHostDswap(my_backend, ViennaCLInt(size/3), viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 2, 2, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 1, 2); check(ref_double_y, host_double_y, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASswap(my_backend, ViennaCLInt(size/3), viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 2, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 1, 2); check(ref_float_y, cuda_float_y, eps_float); ViennaCLCUDADswap(my_backend, ViennaCLInt(size/3), viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 2, 2, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 1, 2); check(ref_double_y, cuda_double_y, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSswap(my_backend, ViennaCLInt(size/3), viennacl::traits::opencl_handle(opencl_float_x).get(), 2, 2, viennacl::traits::opencl_handle(opencl_float_y).get(), 1, 2); check(ref_float_y, opencl_float_y, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDswap(my_backend, ViennaCLInt(size/3), viennacl::traits::opencl_handle(*opencl_double_x).get(), 2, 2, viennacl::traits::opencl_handle(*opencl_double_y).get(), 1, 2); check(ref_double_y, *opencl_double_y, eps_double); } #endif // IAMAX std::cout << std::endl << "-- Testing IxASUM..."; ViennaCLInt ref_index = 0; ref_float_alpha = 0; for (std::size_t i=0; i std::fabs(ref_float_alpha)) { ref_index = ViennaCLInt(i); ref_float_alpha = std::fabs(ref_float_x[0 + 2*i]); } } std::cout << std::endl << "Host: "; ViennaCLInt idx = 0; ViennaCLHostiSamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_x), 0, 2); check(static_cast(ref_index), static_cast(idx), eps_float); idx = 0; ViennaCLHostiDamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 0, 2); check(ref_index, idx, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; idx = 0; ViennaCLCUDAiSamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 0, 2); check(ref_float_x[2*ref_index], ref_float_x[2*idx], eps_float); idx = 0; ViennaCLCUDAiDamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 0, 2); check(ref_double_x[2*ref_index], ref_double_x[2*idx], eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; idx = 0; ViennaCLOpenCLiSamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::traits::opencl_handle(opencl_float_x).get(), 0, 2); check(ref_float_x[2*ref_index], ref_float_x[2*idx], eps_float); idx = 0; if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLiDamax(my_backend, ViennaCLInt(size/3), &idx, viennacl::traits::opencl_handle(*opencl_double_x).get(), 0, 2); check(ref_double_x[2*ref_index], ref_double_x[2*idx], eps_double); } #endif #ifdef VIENNACL_WITH_OPENCL //cleanup if( viennacl::ocl::current_device().double_support() ) { delete opencl_double_x; delete opencl_double_y; } #endif ViennaCLBackendDestroy(&my_backend); // // That's it. // std::cout << std::endl << "!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/sparse.cu000644 001750 001750 00000121772 12267307531 017557 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #ifndef NDEBUG #define NDEBUG #endif // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/compressed_compressed_matrix.hpp" #include "viennacl/coordinate_matrix.hpp" #include "viennacl/ell_matrix.hpp" #include "viennacl/hyb_matrix.hpp" #include "viennacl/vector.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/ilu.hpp" #include "viennacl/linalg/detail/ilu/common.hpp" #include "viennacl/io/matrix_market.hpp" #include "examples/tutorial/Random.hpp" #include "examples/tutorial/vector-io.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { if (s1 != s2) return (s1 - s2) / std::max(fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(ublas::vector & v1, viennacl::vector & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i 0 ) { //if (std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) < 1e-10 ) //absolute tolerance (avoid round-off issues) // v2_cpu[i] = 0; //else v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); } else v2_cpu[i] = 0.0; if (v2_cpu[i] > 0.0001) { //std::cout << "Neighbor: " << i-1 << ": " << v1[i-1] << " vs. " << v2_cpu[i-1] << std::endl; std::cout << "Error at entry " << i << ": " << v1[i] << " vs. " << v2_cpu[i] << std::endl; //std::cout << "Neighbor: " << i+1 << ": " << v1[i+1] << " vs. " << v2_cpu[i+1] << std::endl; exit(EXIT_FAILURE); } } return norm_inf(v2_cpu); } template ScalarType diff(ublas::compressed_matrix & cpu_matrix, VCL_MATRIX & gpu_matrix) { typedef ublas::compressed_matrix CPU_MATRIX; CPU_MATRIX from_gpu(gpu_matrix.size1(), gpu_matrix.size2()); viennacl::backend::finish(); viennacl::copy(gpu_matrix, from_gpu); ScalarType error = 0; //step 1: compare all entries from cpu_matrix with gpu_matrix: //std::cout << "Ublas matrix: " << std::endl; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { //std::cout << "Row " << row_it.index1() << ": " << std::endl; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl; ScalarType current_error = 0; if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } //step 2: compare all entries from gpu_matrix with cpu_matrix (sparsity pattern might differ): //std::cout << "ViennaCL matrix: " << std::endl; for (typename CPU_MATRIX::const_iterator1 row_it = from_gpu.begin1(); row_it != from_gpu.end1(); ++row_it) { //std::cout << "Row " << row_it.index1() << ": " << std::endl; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl; ScalarType current_error = 0; if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } return error; } template int strided_matrix_vector_product_test(Epsilon epsilon, UblasVectorT & result, UblasVectorT const & rhs, VCLVectorT & vcl_result, VCLVectorT & vcl_rhs) { int retval = EXIT_SUCCESS; ublas::compressed_matrix ublas_matrix2(5, 4); ublas_matrix2(0, 0) = NumericT(2.0); ublas_matrix2(0, 2) = NumericT(-1.0); ublas_matrix2(1, 0) = NumericT(3.0); ublas_matrix2(1, 2) = NumericT(-5.0); ublas_matrix2(2, 1) = NumericT(5.0); ublas_matrix2(2, 2) = NumericT(-2.0); ublas_matrix2(3, 2) = NumericT(1.0); ublas_matrix2(3, 3) = NumericT(-6.0); ublas_matrix2(4, 1) = NumericT(7.0); ublas_matrix2(4, 2) = NumericT(-5.0); project(result, ublas::slice(1, 3, 5)) = ublas::prod(ublas_matrix2, project(rhs, ublas::slice(3, 2, 4))); VCL_MatrixT vcl_sparse_matrix2; viennacl::copy(ublas_matrix2, vcl_sparse_matrix2); viennacl::vector vec(4); vec(0) = rhs(3); vec(1) = rhs(5); vec(2) = rhs(7); vec(3) = rhs(9); viennacl::project(vcl_result, viennacl::slice(1, 3, 5)) = viennacl::linalg::prod(vcl_sparse_matrix2, viennacl::project(vcl_rhs, viennacl::slice(3, 2, 4))); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with stided vectors, part 1" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } vcl_result(1) = NumericT(1.0); vcl_result(4) = NumericT(1.0); vcl_result(7) = NumericT(1.0); vcl_result(10) = NumericT(1.0); vcl_result(13) = NumericT(1.0); viennacl::project(vcl_result, viennacl::slice(1, 3, 5)) = viennacl::linalg::prod(vcl_sparse_matrix2, vec); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with strided vectors, part 2" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } return retval; } template< typename NumericT, typename VCL_MATRIX, typename Epsilon > int resize_test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; ublas::compressed_matrix ublas_matrix(5,5); VCL_MATRIX vcl_matrix; ublas_matrix(0,0) = NumericT(10.0); ublas_matrix(0, 1) = NumericT(0.1); ublas_matrix(0, 2) = NumericT(0.2); ublas_matrix(0, 3) = NumericT(0.3); ublas_matrix(0, 4) = NumericT(0.4); ublas_matrix(1,0) = NumericT(1.0); ublas_matrix(1, 1) = NumericT(1.1); ublas_matrix(1, 2) = NumericT(1.2); ublas_matrix(1, 3) = NumericT(1.3); ublas_matrix(1, 4) = NumericT(1.4); ublas_matrix(2,0) = NumericT(2.0); ublas_matrix(2, 1) = NumericT(2.1); ublas_matrix(2, 2) = NumericT(2.2); ublas_matrix(2, 3) = NumericT(2.3); ublas_matrix(2, 4) = NumericT(2.4); ublas_matrix(3,0) = NumericT(3.0); ublas_matrix(3, 1) = NumericT(3.1); ublas_matrix(3, 2) = NumericT(3.2); ublas_matrix(3, 3) = NumericT(3.3); ublas_matrix(3, 4) = NumericT(3.4); ublas_matrix(4,0) = NumericT(4.0); ublas_matrix(4, 1) = NumericT(4.1); ublas_matrix(4, 2) = NumericT(4.2); ublas_matrix(4, 3) = NumericT(4.3); ublas_matrix(4, 4) = NumericT(4.4); viennacl::copy(ublas_matrix, vcl_matrix); ublas::compressed_matrix other_matrix(ublas_matrix.size1(), ublas_matrix.size2()); viennacl::copy(vcl_matrix, other_matrix); std::cout << "Checking for equality after copy..." << std::endl; if( std::fabs(diff(ublas_matrix, vcl_matrix)) > epsilon ) { std::cout << "# Error at operation: equality after copy with sparse matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_matrix, vcl_matrix)) << std::endl; return EXIT_FAILURE; } std::cout << "Testing resize to larger..." << std::endl; ublas_matrix.resize(10, 10, false); //ublas does not allow preserve = true here ublas_matrix(0,0) = NumericT(10.0); ublas_matrix(0, 1) = NumericT(0.1); ublas_matrix(0, 2) = NumericT(0.2); ublas_matrix(0, 3) = NumericT(0.3); ublas_matrix(0, 4) = NumericT(0.4); ublas_matrix(1,0) = NumericT( 1.0); ublas_matrix(1, 1) = NumericT(1.1); ublas_matrix(1, 2) = NumericT(1.2); ublas_matrix(1, 3) = NumericT(1.3); ublas_matrix(1, 4) = NumericT(1.4); ublas_matrix(2,0) = NumericT( 2.0); ublas_matrix(2, 1) = NumericT(2.1); ublas_matrix(2, 2) = NumericT(2.2); ublas_matrix(2, 3) = NumericT(2.3); ublas_matrix(2, 4) = NumericT(2.4); ublas_matrix(3,0) = NumericT( 3.0); ublas_matrix(3, 1) = NumericT(3.1); ublas_matrix(3, 2) = NumericT(3.2); ublas_matrix(3, 3) = NumericT(3.3); ublas_matrix(3, 4) = NumericT(3.4); ublas_matrix(4,0) = NumericT( 4.0); ublas_matrix(4, 1) = NumericT(4.1); ublas_matrix(4, 2) = NumericT(4.2); ublas_matrix(4, 3) = NumericT(4.3); ublas_matrix(4, 4) = NumericT(4.4); //std::cout << ublas_matrix << std::endl; vcl_matrix.resize(10, 10, true); if( std::fabs(diff(ublas_matrix, vcl_matrix)) > epsilon ) { std::cout << "# Error at operation: resize (to larger) with sparse matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_matrix, vcl_matrix)) << std::endl; return EXIT_FAILURE; } ublas_matrix(5,5) = NumericT(5.5); ublas_matrix(5, 6) = NumericT(5.6); ublas_matrix(5, 7) = NumericT(5.7); ublas_matrix(5, 8) = NumericT(5.8); ublas_matrix(5, 9) = NumericT(5.9); ublas_matrix(6,5) = NumericT(6.5); ublas_matrix(6, 6) = NumericT(6.6); ublas_matrix(6, 7) = NumericT(6.7); ublas_matrix(6, 8) = NumericT(6.8); ublas_matrix(6, 9) = NumericT(6.9); ublas_matrix(7,5) = NumericT(7.5); ublas_matrix(7, 6) = NumericT(7.6); ublas_matrix(7, 7) = NumericT(7.7); ublas_matrix(7, 8) = NumericT(7.8); ublas_matrix(7, 9) = NumericT(7.9); ublas_matrix(8,5) = NumericT(8.5); ublas_matrix(8, 6) = NumericT(8.6); ublas_matrix(8, 7) = NumericT(8.7); ublas_matrix(8, 8) = NumericT(8.8); ublas_matrix(8, 9) = NumericT(8.9); ublas_matrix(9,5) = NumericT(9.5); ublas_matrix(9, 6) = NumericT(9.6); ublas_matrix(9, 7) = NumericT(9.7); ublas_matrix(9, 8) = NumericT(9.8); ublas_matrix(9, 9) = NumericT(9.9); viennacl::copy(ublas_matrix, vcl_matrix); std::cout << "Testing resize to smaller..." << std::endl; ublas_matrix.resize(7, 7, false); //ublas does not allow preserve = true here ublas_matrix(0,0) = NumericT(10.0); ublas_matrix(0, 1) = NumericT(0.1); ublas_matrix(0, 2) = NumericT(0.2); ublas_matrix(0, 3) = NumericT(0.3); ublas_matrix(0, 4) = NumericT(0.4); ublas_matrix(1,0) = NumericT( 1.0); ublas_matrix(1, 1) = NumericT(1.1); ublas_matrix(1, 2) = NumericT(1.2); ublas_matrix(1, 3) = NumericT(1.3); ublas_matrix(1, 4) = NumericT(1.4); ublas_matrix(2,0) = NumericT( 2.0); ublas_matrix(2, 1) = NumericT(2.1); ublas_matrix(2, 2) = NumericT(2.2); ublas_matrix(2, 3) = NumericT(2.3); ublas_matrix(2, 4) = NumericT(2.4); ublas_matrix(3,0) = NumericT( 3.0); ublas_matrix(3, 1) = NumericT(3.1); ublas_matrix(3, 2) = NumericT(3.2); ublas_matrix(3, 3) = NumericT(3.3); ublas_matrix(3, 4) = NumericT(3.4); ublas_matrix(4,0) = NumericT( 4.0); ublas_matrix(4, 1) = NumericT(4.1); ublas_matrix(4, 2) = NumericT(4.2); ublas_matrix(4, 3) = NumericT(4.3); ublas_matrix(4, 4) = NumericT(4.4); ublas_matrix(5,5) = NumericT( 5.5); ublas_matrix(5, 6) = NumericT(5.6); ublas_matrix(5, 7) = NumericT(5.7); ublas_matrix(5, 8) = NumericT(5.8); ublas_matrix(5, 9) = NumericT(5.9); ublas_matrix(6,5) = NumericT( 6.5); ublas_matrix(6, 6) = NumericT(6.6); ublas_matrix(6, 7) = NumericT(6.7); ublas_matrix(6, 8) = NumericT(6.8); ublas_matrix(6, 9) = NumericT(6.9); vcl_matrix.resize(7, 7); //std::cout << ublas_matrix << std::endl; if( std::fabs(diff(ublas_matrix, vcl_matrix)) > epsilon ) { std::cout << "# Error at operation: resize (to smaller) with sparse matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_matrix, vcl_matrix)) << std::endl; retval = EXIT_FAILURE; } ublas::vector ublas_vec = ublas::scalar_vector(ublas_matrix.size1(), NumericT(3.1415)); viennacl::vector vcl_vec(ublas_matrix.size1()); std::cout << "Testing transposed unit lower triangular solve: compressed_matrix" << std::endl; viennacl::copy(ublas_vec, vcl_vec); std::cout << "matrix: " << ublas_matrix << std::endl; std::cout << "vector: " << ublas_vec << std::endl; std::cout << "ViennaCL matrix size: " << vcl_matrix.size1() << " x " << vcl_matrix.size2() << std::endl; std::cout << "ublas..." << std::endl; boost::numeric::ublas::inplace_solve((ublas_matrix), ublas_vec, boost::numeric::ublas::unit_lower_tag()); std::cout << "ViennaCL..." << std::endl; viennacl::linalg::inplace_solve((vcl_matrix), vcl_vec, viennacl::linalg::unit_lower_tag()); /* std::list< viennacl::backend::mem_handle > multifrontal_L_row_index_arrays_; std::list< viennacl::backend::mem_handle > multifrontal_L_row_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_L_col_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_L_element_buffers_; std::list< std::size_t > multifrontal_L_row_elimination_num_list_; viennacl::vector multifrontal_U_diagonal_; viennacl::linalg::detail::multifrontal_setup_L(vcl_matrix, multifrontal_U_diagonal_, //dummy multifrontal_L_row_index_arrays_, multifrontal_L_row_buffers_, multifrontal_L_col_buffers_, multifrontal_L_element_buffers_, multifrontal_L_row_elimination_num_list_); viennacl::linalg::detail::multifrontal_substitute(vcl_vec, multifrontal_L_row_index_arrays_, multifrontal_L_row_buffers_, multifrontal_L_col_buffers_, multifrontal_L_element_buffers_, multifrontal_L_row_elimination_num_list_); std::cout << "ublas..." << std::endl; boost::numeric::ublas::inplace_solve((ublas_matrix), ublas_vec, boost::numeric::ublas::upper_tag()); std::cout << "ViennaCL..." << std::endl; std::list< viennacl::backend::mem_handle > multifrontal_U_row_index_arrays_; std::list< viennacl::backend::mem_handle > multifrontal_U_row_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_U_col_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_U_element_buffers_; std::list< std::size_t > multifrontal_U_row_elimination_num_list_; multifrontal_U_diagonal_.resize(vcl_matrix.size1(), false); viennacl::linalg::single_threaded::detail::row_info(vcl_matrix, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); viennacl::linalg::detail::multifrontal_setup_U(vcl_matrix, multifrontal_U_diagonal_, multifrontal_U_row_index_arrays_, multifrontal_U_row_buffers_, multifrontal_U_col_buffers_, multifrontal_U_element_buffers_, multifrontal_U_row_elimination_num_list_); vcl_vec = viennacl::linalg::element_div(vcl_vec, multifrontal_U_diagonal_); viennacl::linalg::detail::multifrontal_substitute(vcl_vec, multifrontal_U_row_index_arrays_, multifrontal_U_row_buffers_, multifrontal_U_col_buffers_, multifrontal_U_element_buffers_, multifrontal_U_row_elimination_num_list_); */ for (std::size_t i=0; i int test(Epsilon const& epsilon) { std::cout << "Testing resizing of compressed_matrix..." << std::endl; int retval = resize_test >(epsilon); if (retval != EXIT_SUCCESS) return retval; std::cout << "Testing resizing of coordinate_matrix..." << std::endl; //if (retval != EXIT_FAILURE) // retval = resize_test >(epsilon); //else // return retval; // -------------------------------------------------------------------------- ublas::vector rhs; ublas::vector result; ublas::compressed_matrix ublas_matrix; if (viennacl::io::read_matrix_market_file(ublas_matrix, "../../examples/testdata/mat65k.mtx") == EXIT_FAILURE) { std::cout << "Error reading Matrix file" << std::endl; return EXIT_FAILURE; } //unsigned int cg_mat_size = cg_mat.size(); std::cout << "done reading matrix" << std::endl; rhs.resize(ublas_matrix.size2()); for (std::size_t i=0; i(); } // add some random numbers to the double-compressed matrix: ublas::compressed_matrix ublas_cc_matrix(ublas_matrix.size1(), ublas_matrix.size2()); ublas_cc_matrix(42,199) = NumericT(3.1415); ublas_cc_matrix(31, 69) = NumericT(2.71); ublas_cc_matrix(23, 32) = NumericT(6); ublas_cc_matrix(177,57) = NumericT(4); ublas_cc_matrix(21, 97) = NumericT(-4); ublas_cc_matrix(92, 25) = NumericT(2); ublas_cc_matrix(89, 62) = NumericT(11); ublas_cc_matrix(1, 7) = NumericT(8); ublas_cc_matrix(85, 41) = NumericT(13); ublas_cc_matrix(66, 28) = NumericT(8); ublas_cc_matrix(21, 74) = NumericT(-2); result = rhs; viennacl::vector vcl_rhs(rhs.size()); viennacl::vector vcl_result(result.size()); viennacl::vector vcl_result2(result.size()); viennacl::compressed_matrix vcl_compressed_matrix(rhs.size(), rhs.size()); viennacl::compressed_compressed_matrix vcl_compressed_compressed_matrix(rhs.size(), rhs.size()); viennacl::coordinate_matrix vcl_coordinate_matrix(rhs.size(), rhs.size()); viennacl::ell_matrix vcl_ell_matrix; viennacl::hyb_matrix vcl_hyb_matrix; viennacl::copy(rhs.begin(), rhs.end(), vcl_rhs.begin()); viennacl::copy(ublas_matrix, vcl_compressed_matrix); viennacl::copy(ublas_cc_matrix, vcl_compressed_compressed_matrix); viennacl::copy(ublas_matrix, vcl_coordinate_matrix); // -------------------------------------------------------------------------- std::cout << "Testing products: ublas" << std::endl; result = viennacl::linalg::prod(ublas_matrix, rhs); std::cout << "Testing products: compressed_matrix" << std::endl; vcl_result = viennacl::linalg::prod(vcl_compressed_matrix, vcl_rhs); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing products: compressed_matrix, strided vectors" << std::endl; retval = strided_matrix_vector_product_test >(epsilon, result, rhs, vcl_result, vcl_rhs); if (retval != EXIT_SUCCESS) return retval; // // Triangular solvers for A \ b: // ublas::compressed_matrix ublas_matrix_trans(ublas_matrix.size2(), ublas_matrix.size1(), ublas_matrix.nnz()); // = trans(ublas_matrix); //note: triangular solvers with uBLAS show atrocious performance, while transposed solvers are quite okay. To keep execution times short, we use a double-transpose-trick in the following. // fast transpose: for (typename ublas::compressed_matrix::iterator1 row_it = ublas_matrix.begin1(); row_it != ublas_matrix.end1(); ++row_it) { for (typename ublas::compressed_matrix::iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ublas_matrix_trans(col_it.index1(), col_it.index2()) = *col_it; } } std::cout << "Testing unit upper triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::unit_upper_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: unit upper triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing upper triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::upper_tag()); viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::upper_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: upper triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing unit lower triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::unit_lower_tag()); /*std::list< viennacl::backend::mem_handle > multifrontal_L_row_index_arrays_; std::list< viennacl::backend::mem_handle > multifrontal_L_row_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_L_col_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_L_element_buffers_; std::list< std::size_t > multifrontal_L_row_elimination_num_list_; viennacl::vector multifrontal_U_diagonal_; viennacl::switch_memory_domain(multifrontal_U_diagonal_, viennacl::MAIN_MEMORY); multifrontal_U_diagonal_.resize(vcl_compressed_matrix.size1(), false); viennacl::linalg::single_threaded::detail::row_info(vcl_compressed_matrix, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); viennacl::linalg::detail::multifrontal_setup_L(vcl_compressed_matrix, multifrontal_U_diagonal_, //dummy multifrontal_L_row_index_arrays_, multifrontal_L_row_buffers_, multifrontal_L_col_buffers_, multifrontal_L_element_buffers_, multifrontal_L_row_elimination_num_list_); viennacl::linalg::detail::multifrontal_substitute(vcl_result, multifrontal_L_row_index_arrays_, multifrontal_L_row_buffers_, multifrontal_L_col_buffers_, multifrontal_L_element_buffers_, multifrontal_L_row_elimination_num_list_);*/ if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: unit lower triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing lower triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::lower_tag()); viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::lower_tag()); /*std::list< viennacl::backend::mem_handle > multifrontal_U_row_index_arrays_; std::list< viennacl::backend::mem_handle > multifrontal_U_row_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_U_col_buffers_; std::list< viennacl::backend::mem_handle > multifrontal_U_element_buffers_; std::list< std::size_t > multifrontal_U_row_elimination_num_list_; multifrontal_U_diagonal_.resize(vcl_compressed_matrix.size1(), false); viennacl::linalg::single_threaded::detail::row_info(vcl_compressed_matrix, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); viennacl::linalg::detail::multifrontal_setup_U(vcl_compressed_matrix, multifrontal_U_diagonal_, multifrontal_U_row_index_arrays_, multifrontal_U_row_buffers_, multifrontal_U_col_buffers_, multifrontal_U_element_buffers_, multifrontal_U_row_elimination_num_list_); vcl_result = viennacl::linalg::element_div(vcl_result, multifrontal_U_diagonal_); viennacl::linalg::detail::multifrontal_substitute(vcl_result, multifrontal_U_row_index_arrays_, multifrontal_U_row_buffers_, multifrontal_U_col_buffers_, multifrontal_U_element_buffers_, multifrontal_U_row_elimination_num_list_);*/ if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: lower triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } /* std::cout << "Testing lower triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(ublas_matrix, result, boost::numeric::ublas::lower_tag()); viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::lower_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: lower triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; }*/ // // Triangular solvers for A^T \ b // std::cout << "Testing transposed unit upper triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix), result, boost::numeric::ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_compressed_matrix), vcl_result, viennacl::linalg::unit_upper_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: unit upper triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing transposed upper triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix), result, boost::numeric::ublas::upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_compressed_matrix), vcl_result, viennacl::linalg::upper_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: upper triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing transposed unit lower triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix), result, boost::numeric::ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_compressed_matrix), vcl_result, viennacl::linalg::unit_lower_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: unit lower triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing transposed lower triangular solve: compressed_matrix" << std::endl; result = rhs; viennacl::copy(result, vcl_result); boost::numeric::ublas::inplace_solve(trans(ublas_matrix), result, boost::numeric::ublas::lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_compressed_matrix), vcl_result, viennacl::linalg::lower_tag()); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: lower triangular solve with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing products: compressed_compressed_matrix" << std::endl; result = viennacl::linalg::prod(ublas_cc_matrix, rhs); vcl_result = viennacl::linalg::prod(vcl_compressed_compressed_matrix, vcl_rhs); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with compressed_compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } { ublas::compressed_matrix temp(vcl_compressed_compressed_matrix.size1(), vcl_compressed_compressed_matrix.size2()); viennacl::copy(vcl_compressed_compressed_matrix, temp); // check that entries are correct by computing the product again: result = viennacl::linalg::prod(temp, rhs); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with compressed_compressed_matrix (after copy back)" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } } std::cout << "Testing products: coordinate_matrix" << std::endl; result = viennacl::linalg::prod(ublas_matrix, rhs); vcl_result = viennacl::linalg::prod(vcl_coordinate_matrix, vcl_rhs); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with coordinate_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing products: coordinate_matrix, strided vectors" << std::endl; //std::cout << " --> SKIPPING <--" << std::endl; retval = strided_matrix_vector_product_test >(epsilon, result, rhs, vcl_result, vcl_rhs); if (retval != EXIT_SUCCESS) return retval; //std::cout << "Copying ell_matrix" << std::endl; viennacl::copy(ublas_matrix, vcl_ell_matrix); ublas_matrix.clear(); viennacl::copy(vcl_ell_matrix, ublas_matrix);// just to check that it's works std::cout << "Testing products: ell_matrix" << std::endl; result = viennacl::linalg::prod(ublas_matrix, rhs); vcl_result.clear(); vcl_result = viennacl::linalg::prod(vcl_ell_matrix, vcl_rhs); //viennacl::linalg::prod_impl(vcl_ell_matrix, vcl_rhs, vcl_result); //std::cout << vcl_result << "\n"; //std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; //std::cout << "First entry of result vector: " << vcl_result[0] << std::endl; if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with ell_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing products: ell_matrix, strided vectors" << std::endl; retval = strided_matrix_vector_product_test >(epsilon, result, rhs, vcl_result, vcl_rhs); if (retval != EXIT_SUCCESS) return retval; //std::cout << "Copying hyb_matrix" << std::endl; viennacl::copy(ublas_matrix, vcl_hyb_matrix); ublas_matrix.clear(); viennacl::copy(vcl_hyb_matrix, ublas_matrix);// just to check that it's works viennacl::copy(ublas_matrix, vcl_hyb_matrix); std::cout << "Testing products: hyb_matrix" << std::endl; result = viennacl::linalg::prod(ublas_matrix, rhs); vcl_result.clear(); vcl_result = viennacl::linalg::prod(vcl_hyb_matrix, vcl_rhs); //viennacl::linalg::prod_impl(vcl_hyb_matrix, vcl_rhs, vcl_result); //std::cout << vcl_result << "\n"; //std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; //std::cout << "First entry of result vector: " << vcl_result[0] << std::endl; if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with hyb_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing products: hyb_matrix, strided vectors" << std::endl; retval = strided_matrix_vector_product_test >(epsilon, result, rhs, vcl_result, vcl_rhs); if (retval != EXIT_SUCCESS) return retval; // -------------------------------------------------------------------------- // -------------------------------------------------------------------------- NumericT alpha = static_cast(2.786); NumericT beta = static_cast(1.432); copy(rhs.begin(), rhs.end(), vcl_rhs.begin()); copy(result.begin(), result.end(), vcl_result.begin()); copy(result.begin(), result.end(), vcl_result2.begin()); std::cout << "Testing scaled additions of products and vectors" << std::endl; result = alpha * viennacl::linalg::prod(ublas_matrix, rhs) + beta * result; vcl_result2 = alpha * viennacl::linalg::prod(vcl_compressed_matrix, vcl_rhs) + beta * vcl_result; if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (compressed_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } vcl_result2.clear(); vcl_result2 = alpha * viennacl::linalg::prod(vcl_coordinate_matrix, vcl_rhs) + beta * vcl_result; if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (coordinate_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } vcl_result2.clear(); vcl_result2 = alpha * viennacl::linalg::prod(vcl_ell_matrix, vcl_rhs) + beta * vcl_result; if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (ell_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } vcl_result2.clear(); vcl_result2 = alpha * viennacl::linalg::prod(vcl_hyb_matrix, vcl_rhs) + beta * vcl_result; if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (hyb_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Sparse Matrices" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = static_cast(1E-4); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } #ifdef VIENNACL_WITH_OPENCL else std::cout << "No double precision support, skipping test..." << std::endl; #endif std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/matrix_row_float.cu000644 001750 001750 00000003330 12267307531 021627 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_float_double.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, row-major, single precision " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; double epsilon = 1e-4; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " --- row-major ---" << std::endl; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/scheduler_sparse.cpp000644 001750 001750 00000041473 12267307531 021767 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #ifndef NDEBUG #define NDEBUG #endif // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/coordinate_matrix.hpp" #include "viennacl/ell_matrix.hpp" #include "viennacl/hyb_matrix.hpp" #include "viennacl/vector.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/ilu.hpp" #include "viennacl/linalg/detail/ilu/common.hpp" #include "viennacl/io/matrix_market.hpp" #include "examples/tutorial/Random.hpp" #include "examples/tutorial/vector-io.hpp" #include "viennacl/scheduler/execute.hpp" #include "viennacl/scheduler/io.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { if (s1 != s2) return (s1 - s2) / std::max(fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(ublas::vector & v1, viennacl::vector & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i 0 ) { //if (std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) < 1e-10 ) //absolute tolerance (avoid round-off issues) // v2_cpu[i] = 0; //else v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); } else v2_cpu[i] = 0.0; if (v2_cpu[i] > 0.0001) { //std::cout << "Neighbor: " << i-1 << ": " << v1[i-1] << " vs. " << v2_cpu[i-1] << std::endl; std::cout << "Error at entry " << i << ": " << v1[i] << " vs. " << v2_cpu[i] << std::endl; //std::cout << "Neighbor: " << i+1 << ": " << v1[i+1] << " vs. " << v2_cpu[i+1] << std::endl; exit(EXIT_FAILURE); } } return norm_inf(v2_cpu); } template ScalarType diff(ublas::compressed_matrix & cpu_matrix, VCL_MATRIX & gpu_matrix) { typedef ublas::compressed_matrix CPU_MATRIX; CPU_MATRIX from_gpu; viennacl::backend::finish(); viennacl::copy(gpu_matrix, from_gpu); ScalarType error = 0; //step 1: compare all entries from cpu_matrix with gpu_matrix: //std::cout << "Ublas matrix: " << std::endl; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { //std::cout << "Row " << row_it.index1() << ": " << std::endl; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl; ScalarType current_error = 0; if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } //step 2: compare all entries from gpu_matrix with cpu_matrix (sparsity pattern might differ): //std::cout << "ViennaCL matrix: " << std::endl; for (typename CPU_MATRIX::const_iterator1 row_it = from_gpu.begin1(); row_it != from_gpu.end1(); ++row_it) { //std::cout << "Row " << row_it.index1() << ": " << std::endl; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl; ScalarType current_error = 0; if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } return error; } // // ------------------------------------------------------------- // template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; // -------------------------------------------------------------------------- NumericT alpha = static_cast(2.786); NumericT beta = static_cast(1.432); ublas::vector rhs; ublas::vector result; ublas::compressed_matrix ublas_matrix; if (viennacl::io::read_matrix_market_file(ublas_matrix, "../../examples/testdata/mat65k.mtx") == EXIT_FAILURE) { std::cout << "Error reading Matrix file" << std::endl; return EXIT_FAILURE; } //unsigned int cg_mat_size = cg_mat.size(); std::cout << "done reading matrix" << std::endl; rhs.resize(ublas_matrix.size2()); for (std::size_t i=0; i(); } result = rhs; viennacl::vector vcl_rhs(rhs.size()); viennacl::vector vcl_result(result.size()); viennacl::vector vcl_result2(result.size()); viennacl::compressed_matrix vcl_compressed_matrix(rhs.size(), rhs.size()); viennacl::coordinate_matrix vcl_coordinate_matrix(rhs.size(), rhs.size()); viennacl::ell_matrix vcl_ell_matrix; viennacl::hyb_matrix vcl_hyb_matrix; viennacl::copy(rhs.begin(), rhs.end(), vcl_rhs.begin()); viennacl::copy(ublas_matrix, vcl_compressed_matrix); viennacl::copy(ublas_matrix, vcl_coordinate_matrix); // -------------------------------------------------------------------------- std::cout << "Testing products: compressed_matrix" << std::endl; result = viennacl::linalg::prod(ublas_matrix, rhs); { viennacl::scheduler::statement my_statement(vcl_result, viennacl::op_assign(), viennacl::linalg::prod(vcl_compressed_matrix, vcl_rhs)); viennacl::scheduler::execute(my_statement); } vcl_result = viennacl::linalg::prod(vcl_compressed_matrix, vcl_rhs); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with compressed_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing products: coordinate_matrix" << std::endl; rhs *= NumericT(1.1); vcl_rhs *= NumericT(1.1); result = viennacl::linalg::prod(ublas_matrix, rhs); { viennacl::scheduler::statement my_statement(vcl_result, viennacl::op_assign(), viennacl::linalg::prod(vcl_coordinate_matrix, vcl_rhs)); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with coordinate_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } result = alpha * viennacl::linalg::prod(ublas_matrix, rhs) + beta * result; { viennacl::scheduler::statement my_statement(vcl_result2, viennacl::op_assign(), alpha * viennacl::linalg::prod(vcl_coordinate_matrix, vcl_rhs) + beta * vcl_result); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (coordinate_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } //std::cout << "Copying ell_matrix" << std::endl; viennacl::copy(ublas_matrix, vcl_ell_matrix); ublas_matrix.clear(); viennacl::copy(vcl_ell_matrix, ublas_matrix);// just to check that it's works std::cout << "Testing products: ell_matrix" << std::endl; rhs *= NumericT(1.1); vcl_rhs *= NumericT(1.1); result = viennacl::linalg::prod(ublas_matrix, rhs); { //viennacl::scheduler::statement my_statement(vcl_result, viennacl::op_assign(), viennacl::linalg::prod(vcl_ell_matrix, vcl_rhs)); //viennacl::scheduler::execute(my_statement); } vcl_result = viennacl::linalg::prod(vcl_ell_matrix, vcl_rhs); if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with ell_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } //std::cout << "Copying hyb_matrix" << std::endl; viennacl::copy(ublas_matrix, vcl_hyb_matrix); ublas_matrix.clear(); viennacl::copy(vcl_hyb_matrix, ublas_matrix);// just to check that it's works viennacl::copy(ublas_matrix, vcl_hyb_matrix); std::cout << "Testing products: hyb_matrix" << std::endl; rhs *= NumericT(1.1); vcl_rhs *= NumericT(1.1); result = viennacl::linalg::prod(ublas_matrix, rhs); { viennacl::scheduler::statement my_statement(vcl_result, viennacl::op_assign(), viennacl::linalg::prod(vcl_hyb_matrix, vcl_rhs)); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(result, vcl_result)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with hyb_matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- // -------------------------------------------------------------------------- copy(rhs.begin(), rhs.end(), vcl_rhs.begin()); copy(result.begin(), result.end(), vcl_result.begin()); copy(result.begin(), result.end(), vcl_result2.begin()); copy(ublas_matrix, vcl_compressed_matrix); copy(ublas_matrix, vcl_coordinate_matrix); copy(ublas_matrix, vcl_ell_matrix); copy(ublas_matrix, vcl_hyb_matrix); std::cout << "Testing scaled additions of products and vectors: compressed_matrix" << std::endl; rhs *= NumericT(1.1); vcl_rhs *= NumericT(1.1); result = alpha * viennacl::linalg::prod(ublas_matrix, rhs) + beta * result; { viennacl::scheduler::statement my_statement(vcl_result2, viennacl::op_assign(), alpha * viennacl::linalg::prod(vcl_compressed_matrix, vcl_rhs) + beta * vcl_result); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (compressed_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing scaled additions of products and vectors: coordinate_matrix" << std::endl; copy(result.begin(), result.end(), vcl_result.begin()); rhs *= NumericT(1.1); vcl_rhs *= NumericT(1.1); result = alpha * viennacl::linalg::prod(ublas_matrix, rhs) + beta * result; { viennacl::scheduler::statement my_statement(vcl_result2, viennacl::op_assign(), alpha * viennacl::linalg::prod(vcl_coordinate_matrix, vcl_rhs) + beta * vcl_result); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (coordinate_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing scaled additions of products and vectors: ell_matrix" << std::endl; copy(result.begin(), result.end(), vcl_result.begin()); rhs *= NumericT(1.1); vcl_rhs *= NumericT(1.1); result = alpha * viennacl::linalg::prod(ublas_matrix, rhs) + beta * result; { viennacl::scheduler::statement my_statement(vcl_result2, viennacl::op_assign(), alpha * viennacl::linalg::prod(vcl_ell_matrix, vcl_rhs) + beta * vcl_result); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (ell_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Testing scaled additions of products and vectors: hyb_matrix" << std::endl; copy(result.begin(), result.end(), vcl_result.begin()); rhs *= NumericT(1.1); vcl_rhs *= NumericT(1.1); result = alpha * viennacl::linalg::prod(ublas_matrix, rhs) + beta * result; { viennacl::scheduler::statement my_statement(vcl_result2, viennacl::op_assign(), alpha * viennacl::linalg::prod(vcl_hyb_matrix, vcl_rhs) + beta * vcl_result); viennacl::scheduler::execute(my_statement); } if( std::fabs(diff(result, vcl_result2)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product (hyb_matrix) with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(result, vcl_result2)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Sparse Matrices" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = static_cast(1E-4); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-13; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } #ifdef VIENNACL_WITH_OPENCL else std::cout << "No double precision support, skipping test..." << std::endl; #endif std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/svd.cpp000644 001750 001750 00000021154 12267307531 017222 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include #include #include #include #include #include "viennacl/matrix.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/svd.hpp" #include "examples/benchmarks/benchmark-utils.hpp" void read_matrix_size(std::fstream& f, std::size_t & sz1, std::size_t & sz2) { if(!f.is_open()) throw std::invalid_argument("File is not opened"); f >> sz1 >> sz2; } template void read_matrix_body(std::fstream& f, viennacl::matrix& A) { if(!f.is_open()) throw std::invalid_argument("File is not opened"); boost::numeric::ublas::matrix h_A(A.size1(), A.size2()); for(std::size_t i = 0; i < h_A.size1(); i++) { for(std::size_t j = 0; j < h_A.size2(); j++) { ScalarType val = 0.0; f >> val; h_A(i, j) = val; } } viennacl::copy(h_A, A); } template void read_vector_body(std::fstream& f, std::vector& v) { if(!f.is_open()) throw std::invalid_argument("File is not opened"); for(std::size_t i = 0; i < v.size(); i++) { ScalarType val = 0.0; f >> val; v[i] = val; } } template void random_fill(std::vector& in) { for(std::size_t i = 0; i < in.size(); i++) in[i] = static_cast(rand()) / RAND_MAX; } template bool check_bidiag(viennacl::matrix& A) { const ScalarType EPS = 0.0001f; std::vector aA(A.size1() * A.size2()); viennacl::fast_copy(A, &aA[0]); for(std::size_t i = 0; i < A.size1(); i++) { for(std::size_t j = 0; j < A.size2(); j++) { ScalarType val = aA[i * A.size2() + j]; if((fabs(val) > EPS) && (i != j) && ((i + 1) != j)) { std::cout << "Failed at " << i << " " << j << " " << val << std::endl; return false; } } } return true; } template ScalarType matrix_compare(viennacl::matrix& res, viennacl::matrix& ref) { std::vector res_std(res.internal_size()); std::vector ref_std(ref.internal_size()); viennacl::fast_copy(res, &res_std[0]); viennacl::fast_copy(ref, &ref_std[0]); ScalarType diff = 0.0; ScalarType mx = 0.0; for(std::size_t i = 0; i < res_std.size(); i++) { diff = std::max(diff, std::abs(res_std[i] - ref_std[i])); mx = std::max(mx, res_std[i]); } return diff / mx; } template ScalarType sigmas_compare(viennacl::matrix& res, std::vector& ref) { std::vector res_std(ref.size()); for(std::size_t i = 0; i < ref.size(); i++) res_std[i] = res(i, i); std::sort(ref.begin(), ref.end()); std::sort(res_std.begin(), res_std.end()); ScalarType diff = 0.0; ScalarType mx = 0.0; for(std::size_t i = 0; i < ref.size(); i++) { diff = std::max(diff, std::abs(res_std[i] - ref[i])); mx = std::max(mx, res_std[i]); } return diff / mx; } template void test_svd(const std::string & fn, ScalarType EPS) { std::size_t sz1, sz2; //read matrix // sz1 = 2048, sz2 = 2048; // std::vector in(sz1 * sz2); // random_fill(in); // read file std::fstream f(fn.c_str(), std::fstream::in); //read size of input matrix read_matrix_size(f, sz1, sz2); std::size_t to = std::min(sz1, sz2); viennacl::matrix Ai(sz1, sz2), Aref(sz1, sz2), QL(sz1, sz1), QR(sz2, sz2); read_matrix_body(f, Ai); std::vector sigma_ref(to); read_vector_body(f, sigma_ref); f.close(); // viennacl::fast_copy(&in[0], &in[0] + in.size(), Ai); Aref = Ai; Timer timer; timer.start(); viennacl::linalg::svd(Ai, QL, QR); viennacl::backend::finish(); double time_spend = timer.get(); viennacl::matrix result1(sz1, sz2), result2(sz1, sz2); result1 = viennacl::linalg::prod(QL, Ai); result2 = viennacl::linalg::prod(result1, trans(QR)); ScalarType sigma_diff = sigmas_compare(Ai, sigma_ref); ScalarType prods_diff = matrix_compare(result2, Aref); bool sigma_ok = (fabs(sigma_diff) < EPS) && (fabs(prods_diff) < std::sqrt(EPS)); //note: computing the product is not accurate down to 10^{-16}, so we allow for a higher tolerance here printf("%6s [%dx%d] %40s sigma_diff = %.6f; prod_diff = %.6f; time = %.6f\n", sigma_ok?"[[OK]]":"[FAIL]", (int)Aref.size1(), (int)Aref.size2(), fn.c_str(), sigma_diff, prods_diff, time_spend); } template void time_svd(std::size_t sz1, std::size_t sz2) { viennacl::matrix Ai(sz1, sz2), QL(sz1, sz1), QR(sz2, sz2); std::vector in(Ai.internal_size1() * Ai.internal_size2()); random_fill(in); viennacl::fast_copy(&in[0], &in[0] + in.size(), Ai); Timer timer; timer.start(); viennacl::linalg::svd(Ai, QL, QR); viennacl::backend::finish(); double time_spend = timer.get(); printf("[%dx%d] time = %.6f\n", static_cast(sz1), static_cast(sz2), time_spend); } template int test(ScalarType epsilon) { test_svd(std::string("../../examples/testdata/svd/qr.example"), epsilon); test_svd(std::string("../../examples/testdata/svd/wiki.example"), epsilon); test_svd(std::string("../../examples/testdata/svd/wiki.qr.example"), epsilon); test_svd(std::string("../../examples/testdata/svd/pysvd.example"), epsilon); test_svd(std::string("../../examples/testdata/svd/random.example"), epsilon); time_svd(500, 500); time_svd(1000, 1000); time_svd(4096, 512); time_svd(2048, 2048); //time_svd(4096, 4096); //takes too long for a standard sanity test. Feel free to uncomment return EXIT_SUCCESS; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: BLAS 3 routines" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-4); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; if( viennacl::ocl::current_device().double_support() ) { { typedef double NumericT; NumericT epsilon = 1.0E-6; //Note: higher accuracy not possible, because data only available with floating point precision std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/spmdm.cpp000644 001750 001750 00000027115 12267307531 017551 0ustar00rupprupp000000 000000 // // include necessary system headers // #include #include // // ublas includes // #include #include #include #include #include #include #include #include #include // Must be set if you want to use ViennaCL algorithms on ublas objects #define VIENNACL_WITH_UBLAS 1 //#define VIENNACL_WITH_OPENCL 1 //#define VIENNACL_WITH_CUDA 1 //#define VIENNACL_DEBUG_KERNEL 1 //#define VIENNACL_BUILD_INFO 1 // // ViennaCL includes // #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/coordinate_matrix.hpp" #include "viennacl/ell_matrix.hpp" #include "viennacl/hyb_matrix.hpp" #include "viennacl/linalg/prod.hpp" //generic matrix-vector product #include "viennacl/linalg/norm_2.hpp" //generic l2-norm for vectors #include "viennacl/io/matrix_market.hpp" // Some helper functions for this tutorial: #include "Random.hpp" using namespace boost::numeric; template < typename ScalarType > int check_matrices(const ublas::matrix< ScalarType >& ref_mat, const ublas::matrix< ScalarType >& mat, ScalarType eps) { std::size_t size1, size2; size1 = ref_mat.size1(); size2 = ref_mat.size2(); if( (size1 != mat.size1()) || (size2 != mat.size2()) ) return EXIT_FAILURE; for (unsigned int i = 0; i < size1; i++) for (unsigned int j = 0; j < size2; j++) { ScalarType rel_error = std::abs(ref_mat(i,j) - mat(i,j)) / std::max(std::abs(ref_mat(i,j)), std::abs(mat(i,j))); if ( rel_error > eps ) { std::cout << "ERROR: Verification failed at (" << i <<", "<< j << "): " << " Expected: " << ref_mat(i,j) << ", got: " << mat(i,j) << " (relative error: " << rel_error << ")" << std::endl; return EXIT_FAILURE; } } std::cout << "Everything went well!" << std::endl; return EXIT_SUCCESS; } template int test(NumericT epsilon) { int retVal = EXIT_SUCCESS; ublas::compressed_matrix ublas_lhs; if (viennacl::io::read_matrix_market_file(ublas_lhs, "../../examples/testdata/mat65k.mtx") == EXIT_FAILURE) { std::cout << "Error reading Matrix file" << std::endl; return EXIT_FAILURE; } // add some extra weight to diagonal in order to avoid issues with round-off errors for (std::size_t i=0; i compressed_lhs; viennacl::ell_matrix ell_lhs; viennacl::coordinate_matrix coo_lhs; viennacl::hyb_matrix hyb_lhs; ublas::matrix ublas_result; viennacl::matrix result; viennacl::copy( ublas_lhs, compressed_lhs); viennacl::copy( ublas_lhs, ell_lhs); viennacl::copy( ublas_lhs, coo_lhs); viennacl::copy( ublas_lhs, hyb_lhs); ublas::matrix ublas_rhs1(ublas_lhs.size2(), cols_rhs); viennacl::matrix rhs1(ublas_lhs.size2(), cols_rhs); ublas::matrix ublas_rhs2; viennacl::matrix rhs2; ublas::matrix temp(ublas_rhs1.size1(), cols_rhs); for (unsigned int i = 0; i < ublas_rhs1.size1(); i++) for (unsigned int j = 0; j < ublas_rhs1.size2(); j++) ublas_rhs1(i,j) = NumericT(0.5) + NumericT(0.1) * random(); viennacl::copy( ublas_rhs1, rhs1); ublas_rhs2 = ublas::trans( ublas_rhs1); viennacl::copy( ublas_rhs2, rhs2); /* gold result */ ublas_result = ublas::prod( ublas_lhs, ublas_rhs1); /******************************************************************/ std::cout << "Testing compressed(CSR) lhs * dense rhs" << std::endl; result = viennacl::linalg::prod( compressed_lhs, rhs1); temp.clear(); viennacl::copy( result, temp); retVal = check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(ELL) lhs * dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( ell_lhs, rhs1); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(COO) lhs * dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( coo_lhs, rhs1); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(HYB) lhs * dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( hyb_lhs, rhs1); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ /* gold result */ ublas_result = ublas::prod( ublas_lhs, ublas::trans(ublas_rhs2)); /******************************************************************/ std::cout << std::endl << "Testing compressed(CSR) lhs * transposed dense rhs:" << std::endl; result.clear(); result = viennacl::linalg::prod( compressed_lhs, viennacl::trans(rhs2)); temp.clear(); viennacl::copy( result, temp); retVal = check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(ELL) lhs * transposed dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( ell_lhs, viennacl::trans(rhs2)); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(COO) lhs * transposed dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( coo_lhs, viennacl::trans(rhs2)); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(HYB) lhs * dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( hyb_lhs, viennacl::trans(rhs2)); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ if(retVal == EXIT_SUCCESS) { std::cout << "Tests passed successfully" << std::endl; } return retVal; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Sparse-Dense Matrix Multiplication" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = static_cast(1E-4); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: row-major, row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: row-major, column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: column-major, row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: column-major, column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: row-major, row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: row-major, column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: column-major, row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: column-major, column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } #ifdef VIENNACL_WITH_OPENCL else std::cout << "No double precision support, skipping test..." << std::endl; #endif std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/vector_float_double.hpp000644 001750 001750 00000173202 12267307531 022456 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include #include // We don't need debug mode in UBLAS: #define BOOST_UBLAS_NDEBUG // // *** Boost // #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/vector.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/inner_prod.hpp" #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #include "Random.hpp" using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::scalar const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::entry_proxy const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ublas::vector const & v1, ViennaCLVectorType const & vcl_vec) { ublas::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return ublas::norm_inf(v2_cpu); } template int check(T1 const & t1, T2 const & t2, double epsilon) { int retval = EXIT_SUCCESS; double temp = std::fabs(diff(t1, t2)); if (temp > epsilon) { std::cout << "# Error! Relative difference: " << temp << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename Epsilon, typename UblasVectorType, typename ViennaCLVectorType1, typename ViennaCLVectorType2 > int test(Epsilon const& epsilon, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; NumericT cpu_result = 42.0; viennacl::scalar gpu_result = 43.0; // // Initializer: // std::cout << "Checking for zero_vector initializer..." << std::endl; ublas_v1 = ublas::zero_vector(ublas_v1.size()); vcl_v1 = viennacl::zero_vector(vcl_v1.size()); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for scalar_vector initializer..." << std::endl; ublas_v1 = ublas::scalar_vector(ublas_v1.size(), cpu_result); vcl_v1 = viennacl::scalar_vector(vcl_v1.size(), cpu_result); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas::scalar_vector(ublas_v1.size(), gpu_result); vcl_v1 = viennacl::scalar_vector(vcl_v1.size(), gpu_result); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for unit_vector initializer..." << std::endl; ublas_v1 = ublas::unit_vector(ublas_v1.size(), 5); vcl_v1 = viennacl::unit_vector(vcl_v1.size(), 5); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i(); ublas_v2[i] = NumericT(1.0) + random(); } viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // Part 1: Norms and inner product // // -------------------------------------------------------------------------- std::cout << "Testing inner_prod..." << std::endl; cpu_result = viennacl::linalg::inner_prod(ublas_v1, ublas_v2); NumericT cpu_result2 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2); gpu_result = viennacl::linalg::inner_prod(vcl_v1, vcl_v2); if (check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result = inner_prod(ublas_v1 + ublas_v2, ublas_v2 - ublas_v1); NumericT cpu_result3 = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2 - vcl_v1); gpu_result = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2 - vcl_v1); if (check(cpu_result, cpu_result3, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing norm_1..." << std::endl; cpu_result = ublas::norm_1(ublas_v1); gpu_result = viennacl::linalg::norm_1(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; gpu_result = 2 * cpu_result; //reset gpu_result = ublas::norm_1(ublas_v1); cpu_result = viennacl::linalg::norm_1(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; gpu_result = ublas::norm_1(ublas_v1 + ublas_v2); cpu_result = viennacl::linalg::norm_1(vcl_v1 + vcl_v2); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing norm_2..." << std::endl; cpu_result = ublas::norm_2(ublas_v1); gpu_result = viennacl::linalg::norm_2(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; gpu_result = 2 * cpu_result; //reset gpu_result = ublas::norm_2(ublas_v1); cpu_result = viennacl::linalg::norm_2(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; gpu_result = ublas::norm_2(ublas_v1 + ublas_v2); cpu_result = viennacl::linalg::norm_2(vcl_v1 + vcl_v2); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing norm_inf..." << std::endl; cpu_result = ublas::norm_inf(ublas_v1); gpu_result = viennacl::linalg::norm_inf(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; gpu_result = 2 * cpu_result; //reset gpu_result = ublas::norm_inf(ublas_v1); cpu_result = viennacl::linalg::norm_inf(vcl_v1); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; gpu_result = ublas::norm_inf(ublas_v1 + ublas_v2); cpu_result = viennacl::linalg::norm_inf(vcl_v1 + vcl_v2); if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing index_norm_inf..." << std::endl; std::size_t cpu_index = ublas::index_norm_inf(ublas_v1); std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1); if (check(static_cast(cpu_index), static_cast(gpu_index), epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- cpu_result = ublas_v1[index_norm_inf(ublas_v1)]; gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)]; if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result = ublas_v1[index_norm_inf(ublas_v1 + ublas_v2)]; gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)]; if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // Plane rotation and assignments // // -------------------------------------------------------------------------- ublas::vector x = ublas_v1; ublas::vector y = ublas_v2; ublas::vector t = ublas_v1; t.assign (NumericT(1.1) * x + NumericT(2.3) * y), y.assign (- NumericT(2.3) * x + NumericT(1.1) * y), x.assign (t); viennacl::linalg::plane_rotation(vcl_v1, vcl_v2, NumericT(1.1), NumericT(2.3)); if (check(x, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(y, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing assignments..." << std::endl; NumericT val = static_cast(1e-1); for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = val; for (size_t i=0; i < vcl_v1.size(); ++i) vcl_v1(i) = val; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiplication and division of vectors by scalars // for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(1.0) + random(); ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing scaling with CPU scalar..." << std::endl; NumericT alpha = static_cast(1.7182); viennacl::scalar gpu_alpha = alpha; ublas_v1 *= alpha; vcl_v1 *= alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing scaling with GPU scalar..." << std::endl; ublas_v1 *= alpha; vcl_v1 *= gpu_alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing scaling with scalar expression..." << std::endl; ublas_v1 *= inner_prod(ublas_v1, ublas_v2); vcl_v1 *= viennacl::linalg::inner_prod(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; NumericT beta = static_cast(1.4153); viennacl::scalar gpu_beta = beta; std::cout << "Testing shrinking with CPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing shrinking with GPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // add and inplace_add of vectors // for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(1.0) + random(); ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing add on vector..." << std::endl; std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas_v1 + ublas_v2; vcl_v1 = vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing add on vector with flipsign..." << std::endl; ublas_v1 = - ublas_v1 + ublas_v2; vcl_v1 = - vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace-add on vector..." << std::endl; ublas_v1 += ublas_v2; vcl_v1 += vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing assignment to vector with vector multiplied by scalar expression..." << std::endl; ublas_v1 = inner_prod(ublas_v1, ublas_v2) * ublas_v2; vcl_v1 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2) * vcl_v2; // // subtract and inplace_subtract of vectors // std::cout << "Testing sub on vector..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2; vcl_v1 = vcl_v1 - vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace-sub on vector..." << std::endl; ublas_v1 -= ublas_v2; vcl_v1 -= vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiply-add // std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(1.0) + random(); ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + ublas_v2; vcl_v1 = alpha * vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = alpha * vcl_v1 + beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v1 + beta * ublas_v2; vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v1 - beta * ublas_v2; vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // division-add // std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(1.0) + random(); ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2; vcl_v1 = vcl_v1 / alpha + vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 / beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 * alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v2 / alpha; vcl_v1 += vcl_v2 / alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha - ublas_v2 / beta; vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 * alpha - ublas_v2 / beta; vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v2 * alpha; vcl_v1 += vcl_v2 * gpu_alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiply-subtract // std::cout << "Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(1.0) + random(); ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - ublas_v2; vcl_v1 = alpha * vcl_v1 - vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - beta * ublas_v2; vcl_v1 = alpha * vcl_v1 - beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v2; vcl_v1 -= alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - alpha * ublas_v2; vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 - beta * ublas_v2; vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v1 + beta * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v1 - beta * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // division-subtract // std::cout << "Testing division-subtract on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(1.0) + random(); ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2 / alpha; vcl_v1 = vcl_v1 - vcl_v2 / alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with CPU scalar (left)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2; vcl_v1 = vcl_v1 / alpha - vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with CPU scalar (both)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2 / alpha; vcl_v1 = vcl_v1 / alpha - vcl_v2 / alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with CPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v2 / alpha; vcl_v1 -= vcl_v2 / alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v2 / alpha; vcl_v1 -= vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with GPU scalar (right)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2 / alpha; vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with GPU scalar (left)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 - ublas_v2 / alpha; vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-subtract on vector with GPU scalar (both)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2 / beta; vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha - ublas_v2 / beta; vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-division-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 * alpha - ublas_v2 / beta; vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-multiply-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha - ublas_v2 * beta; vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 * alpha + ublas_v2 / beta; vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 * alpha - ublas_v2 / beta; vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= ublas_v1 / alpha - ublas_v2 * beta; vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 -= alpha * ublas_v2; vcl_v1 -= gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // More complicated expressions (for ensuring the operator overloads work correctly) // for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(1.0) + random(); ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing three vector additions..." << std::endl; ublas_v1 = ublas_v2 + ublas_v1 + ublas_v2; vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing complicated vector expression with CPU scalar..." << std::endl; ublas_v1 = beta * (ublas_v1 - alpha * ublas_v2); vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing complicated vector expression with GPU scalar..." << std::endl; ublas_v1 = beta * (ublas_v1 - alpha * ublas_v2); vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing swap..." << std::endl; swap(ublas_v1, ublas_v2); swap(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- for (std::size_t i=0; i(); ublas_v2[i] = NumericT(5.0) + random(); } viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing unary operator-..." << std::endl; ublas_v1 = - ublas_v2; vcl_v1 = - vcl_v2; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing elementwise multiplication..." << std::endl; std::cout << " v1 = element_prod(v1, v2);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2); vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1, v2);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1, ublas_v2); vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 -= element_prod(v1, v2);" << std::endl; ublas_v1 -= ublas::element_prod(ublas_v1, ublas_v2); vcl_v1 -= viennacl::linalg::element_prod(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1 + v2, v2);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1 + v2, v2);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 -= element_prod(v1 + v2, v2);" << std::endl; ublas_v1 -= ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 -= viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1, v2 + v1);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1, v2 + v1);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 -= element_prod(v1, v2 + v1);" << std::endl; ublas_v1 -= ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 -= viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1 + v2, v2 + v1);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1 + v2, v2 + v1);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 -= element_prod(v1 + v2, v2 + v1);" << std::endl; ublas_v1 -= ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 -= viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing elementwise division..." << std::endl; for (std::size_t i=0; i(); ublas_v2[i] = NumericT(5.0) + random(); } viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas::element_div(ublas_v1, ublas_v2); vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 += ublas::element_div(ublas_v1, ublas_v2); vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 -= ublas::element_div(ublas_v1, ublas_v2); vcl_v1 -= viennacl::linalg::element_div(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; /////// ublas_v1 = ublas::element_div(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 += ublas::element_div(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 -= ublas::element_div(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 -= viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; /////// ublas_v1 = ublas::element_div(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 += ublas::element_div(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 -= ublas::element_div(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 -= viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; /////// ublas_v1 = ublas::element_div(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 += ublas::element_div(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 -= ublas::element_div(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 -= viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing elementwise power function..." << std::endl; for (std::size_t i=0; i(); ublas_v2[i] = NumericT(5.0) + random(); } UblasVectorType ublas_v3 = ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); for (std::size_t i=0; i() / NumericT(4); #define GENERATE_UNARY_OP_TEST(FUNCNAME) \ ublas_v2 = NumericT(3.1415) * ublas_v1; \ viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); \ viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); \ \ for (std::size_t i=0; i() / NumericT(4); GENERATE_UNARY_OP_TEST(exp); GENERATE_UNARY_OP_TEST(floor); GENERATE_UNARY_OP_TEST(fabs); GENERATE_UNARY_OP_TEST(log); GENERATE_UNARY_OP_TEST(log10); GENERATE_UNARY_OP_TEST(sin); GENERATE_UNARY_OP_TEST(sinh); GENERATE_UNARY_OP_TEST(fabs); //GENERATE_UNARY_OP_TEST(abs); //OpenCL allows abs on integers only GENERATE_UNARY_OP_TEST(sqrt); GENERATE_UNARY_OP_TEST(tan); GENERATE_UNARY_OP_TEST(tanh); // -------------------------------------------------------------------------- ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing another complicated vector expression with CPU scalars..." << std::endl; ublas_v1 = ublas_v2 / alpha + beta * (ublas_v1 - alpha*ublas_v2); vcl_v1 = vcl_v2 / alpha + beta * (vcl_v1 - alpha*vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing another complicated vector expression with GPU scalars..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v2 / alpha + beta * (ublas_v1 - alpha*ublas_v2); vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * (vcl_v1 - gpu_alpha*vcl_v2); if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing lenghty sum of scaled vectors..." << std::endl; ublas_v2 = NumericT(3.1415) * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v2 / alpha + beta * ublas_v1 - alpha * ublas_v2 + beta * ublas_v1 - alpha * ublas_v1; vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * vcl_v1 - alpha * vcl_v2 + beta * vcl_v1 - alpha * vcl_v1; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- return retval; } template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; std::size_t size = 24656; std::cout << "Running tests for vector of size " << size << std::endl; // // Set up UBLAS objects // ublas::vector ublas_full_vec(size); ublas::vector ublas_full_vec2(ublas_full_vec.size()); for (std::size_t i=0; i(); ublas_full_vec2[i] = NumericT(1.0) + random(); } ublas::range r1( ublas_full_vec.size() / 4, 2 * ublas_full_vec.size() / 4); ublas::range r2(2 * ublas_full_vec2.size() / 4, 3 * ublas_full_vec2.size() / 4); ublas::vector_range< ublas::vector > ublas_range_vec(ublas_full_vec, r1); ublas::vector_range< ublas::vector > ublas_range_vec2(ublas_full_vec2, r2); ublas::slice s1( ublas_full_vec.size() / 4, 3, ublas_full_vec.size() / 4); ublas::slice s2(2 * ublas_full_vec2.size() / 4, 2, ublas_full_vec2.size() / 4); ublas::vector_slice< ublas::vector > ublas_slice_vec(ublas_full_vec, s1); ublas::vector_slice< ublas::vector > ublas_slice_vec2(ublas_full_vec2, s2); // // Set up ViennaCL objects // viennacl::vector vcl_full_vec(ublas_full_vec.size()); viennacl::vector vcl_full_vec2(ublas_full_vec2.size()); viennacl::fast_copy(ublas_full_vec.begin(), ublas_full_vec.end(), vcl_full_vec.begin()); viennacl::copy(ublas_full_vec2.begin(), ublas_full_vec2.end(), vcl_full_vec2.begin()); viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4); viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4); viennacl::vector_range< viennacl::vector > vcl_range_vec(vcl_full_vec, vcl_r1); viennacl::vector_range< viennacl::vector > vcl_range_vec2(vcl_full_vec2, vcl_r2); { viennacl::vector vcl_short_vec(vcl_range_vec); viennacl::vector vcl_short_vec2 = vcl_range_vec2; ublas::vector ublas_short_vec(ublas_range_vec); ublas::vector ublas_short_vec2(ublas_range_vec2); std::cout << "Testing creation of vectors from range..." << std::endl; if (check(ublas_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4); viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4); viennacl::vector_slice< viennacl::vector > vcl_slice_vec(vcl_full_vec, vcl_s1); viennacl::vector_slice< viennacl::vector > vcl_slice_vec2(vcl_full_vec2, vcl_s2); viennacl::vector vcl_short_vec(vcl_slice_vec); viennacl::vector vcl_short_vec2 = vcl_slice_vec2; ublas::vector ublas_short_vec(ublas_slice_vec); ublas::vector ublas_short_vec2(ublas_slice_vec2); std::cout << "Testing creation of vectors from slice..." << std::endl; if (check(ublas_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // Now start running tests for vectors, ranges and slices: // std::cout << " ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = range **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = range, vcl_v2 = vector **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = range **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = slice **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = range **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl; retval = test(epsilon, ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/generator_blas1.cpp000644 001750 001750 00000042773 12267307531 021510 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2012, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include // // *** Boost // #include #include // // *** ViennaCL // #define VIENNACL_WITH_UBLAS 1 #define VIENNACL_DEBUG_ALL #define VIENNACL_DEBUG_BUILD #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/inner_prod.hpp" #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #include "viennacl/generator/generate.hpp" #include "viennacl/scheduler/io.hpp" #define CHECK_RESULT(cpu,gpu, op) \ if ( float delta = std::fabs ( diff ( cpu, gpu) ) > epsilon ) {\ std::cout << "# Error at operation: " #op << std::endl;\ std::cout << " diff: " << delta << std::endl;\ retval = EXIT_FAILURE;\ }\ using namespace boost::numeric; using namespace viennacl; template ScalarType diff(ublas::matrix & mat1, VCLMatrixType & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); viennacl::copy(mat2, mat2_cpu); double ret = 0; double act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } template ScalarType diff ( ublas::vector & v1, viennacl::vector & v2 ) { ublas::vector v2_cpu ( v2.size() ); viennacl::copy( v2.begin(), v2.end(), v2_cpu.begin() ); for ( unsigned int i=0; i 0 ) v2_cpu[i] = std::fabs ( v2_cpu[i] - v1[i] ) / std::max ( std::fabs ( v2_cpu[i] ), std::fabs ( v1[i] ) ); else v2_cpu[i] = 0.0; } return norm_inf ( v2_cpu ); } template ScalarType diff(ScalarType s, viennacl::scalar & gs){ ScalarType other = gs; return (s - other) / std::max(s, other); } template< typename NumericT, typename Epsilon > int test_vector ( Epsilon const& epsilon) { int retval = EXIT_SUCCESS; unsigned int size = 1024; ublas::vector cw(size); ublas::vector cx(size); ublas::vector cy(size); ublas::vector cz(size); NumericT s; for(unsigned int i=0; i w (size); viennacl::vector x (size); viennacl::vector y (size); viennacl::vector z (size); viennacl::scalar gs(0); cx = NumericT(2.0f)*cw; cy = NumericT(3.0f)*cw; cz = NumericT(4.0f)*cw; viennacl::copy (cw, w); viennacl::copy (cx, x); viennacl::copy (cy, y); viennacl::copy (cz, z); NumericT alpha = NumericT(3.14); NumericT beta = NumericT(3.51); // -------------------------------------------------------------------------- { std::cout << "w = x + y ..." << std::endl; cw = cx + cy; viennacl::scheduler::statement statement(w, viennacl::op_assign(), x + y); generator::generate_enqueue_statement(statement, statement.array()[0]); viennacl::backend::finish(); CHECK_RESULT(cw, w, w = x + y); } { std::cout << "y = w + x ..." << std::endl; cy = cw + cx; viennacl::scheduler::statement statement(y, viennacl::op_assign(), w + x); generator::generate_enqueue_statement(statement, statement.array()[0]); viennacl::backend::finish(); CHECK_RESULT(cy, y, y = w + x); } { std::cout << "x = y + w ..." << std::endl; cx = cy + cw; viennacl::scheduler::statement statement(x, viennacl::op_assign(), y + w); generator::generate_enqueue_statement(statement, statement.array()[0]); viennacl::backend::finish(); CHECK_RESULT(cx, x, x = y + w); } { std::cout << "w = alpha*x + beta*y ..." << std::endl; cw = alpha*cx + beta*cy; viennacl::scheduler::statement statement(w, viennacl::op_assign(), alpha*x + beta*y); generator::generate_enqueue_statement(statement, statement.array()[0]); viennacl::backend::finish(); CHECK_RESULT(cw, w, w = alpha*x + beta*y); } { std::cout << "s = inner_prod(x,y)..." << std::endl; s = 0; for(unsigned int i=0 ; i 0.42" << std::endl; // for(unsigned int i=0 ; i < size ; ++i){ // cw(i) = cx(i) > (NumericT)0.42; // } // generator::custom_operation op; // op.add(vec(w) = vec(x) > (NumericT)0.42); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(cw, w, w = x > 1) // } // { // std::cout << "w = -w ..." << std::endl; // cw = -cw; // generator::custom_operation op; // op.add(vec(w) = -vec(w)); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(cw,w, w=-w); // } // { // std::cout << "w = x + shift(x,-5) + shift(x,3) ..." << std::endl; // for(unsigned int i=0 ; i(vec(x))); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(s,gs, s=max(x)); // } // { // std::cout << "Multiline ..." << std::endl; // viennacl::generator::custom_operation op; // op.add(vec(w) = vec(x) - vec(y)); // op.add(vec(y) = element_prod(vec(w), vec(z))); // op.add(vec(z) = vec(x) + vec(z)); // op.execute(); // viennacl::backend::finish(); // for(unsigned int i=0 ; i < size ; ++i){ // cw(i) = cx(i) - cy(i); // cy(i) = cw(i)*cz(i); // cz(i) = cx(i) + cz(i); // } // CHECK_RESULT(cw, w, Multiline); // CHECK_RESULT(cy, y, Multiline); // CHECK_RESULT(cz, z, Multiline); // } return retval; } template< typename NumericT, class Layout, typename Epsilon > int test_matrix ( Epsilon const& epsilon) { int retval = EXIT_SUCCESS; unsigned int size1 = 1024; unsigned int size2 = 1024; unsigned int pattern_size1 = 256; unsigned int pattern_size2 = 128; // unsigned int n_rep1 = size1/pattern_size1; // unsigned int n_rep2 = size2/pattern_size2; ublas::matrix cA(size1,size2); ublas::matrix cB(size1,size2); ublas::matrix cC(size1,size2); ublas::matrix cPattern(pattern_size1,pattern_size2); ublas::vector cx(size1); for(unsigned int i=0; i A (size1, size2); viennacl::matrix B (size1, size2); viennacl::matrix C (size1, size2); viennacl::matrix pattern(pattern_size1, pattern_size2); viennacl::vector x(size1); cB = cA; cC = cA; viennacl::copy(cA,A); viennacl::copy(cB,B); viennacl::copy(cC,C); viennacl::copy(cx,x); viennacl::copy(cPattern,pattern); { std::cout << "C = A + B ..." << std::endl; cC = ( cA + cB ); viennacl::scheduler::statement statement(C, viennacl::op_assign(), A + B); generator::generate_enqueue_statement(statement, statement.array()[0]); viennacl::backend::finish(); CHECK_RESULT(cC, C, C=A+B) } // { // std::cout << "C = diag(x) ..." << std::endl; // for(unsigned int i = 0 ; i < size1 ; ++i){ // for(unsigned int j = 0 ; j < size2 ; ++j){ // cC(i,j) = (i==j)?cx[i]:0; // } // } // generator::custom_operation op; // op.add(mat(C) = generator::diag(vec(x))); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(cC, C, C = diag(x)) // } // { // std::cout << "x = diag(C) ..." << std::endl; // for(unsigned int i = 0; i < size1 ; ++i){ // cx(i) = cA(i,i); // } // generator::custom_operation op; // op.add(vec(x) = generator::diag(mat(A))); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(cx,x, x = diag(A)); // } // { // std::cout << "C = repmat(P, M, N) ..." << std::endl; // for(unsigned int i = 0 ; i < size1 ; ++i) // for(unsigned int j = 0 ; j < size2 ; ++j) // cC(i,j) = cPattern(i%pattern_size1, j%pattern_size2); // generator::custom_operation op; // op.add(mat(C) = generator::repmat(mat(pattern),n_rep1,n_rep2)); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(cC, C, C = repmat(P, M, N)) // } // { // std::cout << "C = repmat(x, 1, N) ..." << std::endl; // for(unsigned int i = 0 ; i < size1 ; ++i) // for(unsigned int j = 0 ; j < size2 ; ++j) // cC(i,j) = cx(i); // generator::custom_operation op; // op.add(mat(C) = generator::repmat(vec(x),1, C.size2())); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(cC, C, C = repmat(x, 1, N)) // } // { // std::cout << "C = trans(repmat(x, 1, N)) ..." << std::endl; // for(unsigned int i = 0 ; i < size1 ; ++i) // for(unsigned int j = 0 ; j < size2 ; ++j) // cC(i,j) = cx(j); // generator::custom_operation op; // op.add(mat(C) = generator::trans(generator::repmat(vec(x),1,C.size2()))); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(cC, C, C = repmat(x, 1, N)) // } // { // std::cout << "C = -A ..." << std::endl; // for(unsigned int i = 0 ; i < size1 ; ++i) // for(unsigned int j = 0 ; j < size2 ; ++j) // cC(i,j) = -cA(i,j); // generator::custom_operation op; // op.add(mat(C) = -mat(A)); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(cC, C, C = -A) // } // { // std::cout << "C = 1/(1+EXP(-A)) ..." << std::endl; // for(unsigned int i = 0 ; i < size1 ; ++i) // for(unsigned int j = 0 ; j < size2 ; ++j) // cC(i,j) = 1.0f/(1.0f+std::exp(-cA(i,j))); // generator::custom_operation op; // op.add(mat(C) = 1.0f/(1.0f+generator::exp(-mat(A)))); // op.execute(); // viennacl::backend::finish(); // CHECK_RESULT(cC, C, C = 1/(1+EXP(-A))) // } return retval; } int main(int argc, char* argv[]){ std::vector args(argv,argv+argc); unsigned int requested_device; if(argc!=2){ requested_device=0; } else{ requested_device = atoi(args[1].c_str()); } int retval = EXIT_SUCCESS; typedef std::vector< viennacl::ocl::platform > platforms_type; typedef std::vector devices_type; platforms_type platforms = viennacl::ocl::get_platforms(); size_t num_platforms = platforms.size(); unsigned int current_device = 0; for(unsigned int k=0 ; k < num_platforms ; ++k) { viennacl::ocl::platform pf(k); viennacl::ocl::set_context_device_type(k,CL_DEVICE_TYPE_ALL); viennacl::ocl::set_context_platform_index(k,k); viennacl::ocl::switch_context(k); devices_type dev = viennacl::ocl::current_context().devices(); for(devices_type::iterator it = dev.begin() ; it != dev.end() ; ++it){ if(current_device++ == requested_device ){ viennacl::ocl::switch_device(*it); std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << " Device Info" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << viennacl::ocl::current_device().info() << std::endl; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector" << std::endl; std::cout << "----------------------------------------------" << std::endl; { double epsilon = 1.0E-4; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: float" << std::endl; retval = test_vector (epsilon); std::cout << std::endl; // std::cout << "# Testing setup:" << std::endl; // std::cout << " numeric: double" << std::endl; // retval = test_vector (epsilon); if ( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } // std::cout << std::endl; // std::cout << "----------------------------------------------" << std::endl; // std::cout << "----------------------------------------------" << std::endl; // std::cout << "## Test :: Matrix" << std::endl; // std::cout << "----------------------------------------------" << std::endl; // { // double epsilon = 1.0E-4; // std::cout << "# Testing setup:" << std::endl; // std::cout << " numeric: float" << std::endl; // std::cout << " --------------" << std::endl; // std::cout << " Row-Major" << std::endl; // std::cout << " --------------" << std::endl; // retval = test_matrix (epsilon); // std::cout << " --------------" << std::endl; // std::cout << " Column-Major" << std::endl; // std::cout << " --------------" << std::endl; // retval &= test_matrix (epsilon); // std::cout << " numeric: double" << std::endl; // std::cout << " --------------" << std::endl; // std::cout << " Row-Major" << std::endl; // std::cout << " --------------" << std::endl; // retval = test_matrix (epsilon); // std::cout << " --------------" << std::endl; // std::cout << " Column-Major" << std::endl; // std::cout << " --------------" << std::endl; // retval &= test_matrix (epsilon); // if ( retval == EXIT_SUCCESS ) // std::cout << "# Test passed" << std::endl; // else // return retval; // } } } } } ViennaCL-1.5.1-src/tests/src/blas3_prod_float_double.hpp000644 001750 001750 00000067457 12267307531 023222 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ //#define NDEBUG //#define VIENNACL_DEBUG_BUILD // We don't need debug mode in UBLAS: #define BOOST_UBLAS_NDEBUG // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL //#define VIENNACL_DEBUG_BUILD #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/matrix_proxy.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "examples/tutorial/Random.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(ublas::vector & v1, viennacl::vector & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (std::size_t i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return norm_inf(v2_cpu); } template ScalarType diff(ublas::matrix & mat1, VCLMatrixType & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } // // Part 1: Matrix-matrix multiplications // template< typename NumericT, typename Epsilon, typename ReferenceMatrixTypeA, typename ReferenceMatrixTypeB, typename ReferenceMatrixTypeC, typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC> int test_prod(Epsilon const& epsilon, ReferenceMatrixTypeA const & A, ReferenceMatrixTypeA const & A_trans, ReferenceMatrixTypeB const & B, ReferenceMatrixTypeB const & B_trans, ReferenceMatrixTypeC & C, MatrixTypeA const & vcl_A, MatrixTypeA const & vcl_A_trans, MatrixTypeB const & vcl_B, MatrixTypeB const & vcl_B_trans, MatrixTypeC & vcl_C ) { int retval = EXIT_SUCCESS; NumericT act_diff = 0; // Test: C +-= A * B -------------------------------------------------------------------------- C = viennacl::linalg::prod(A, B); vcl_C = viennacl::linalg::prod(vcl_A, vcl_B); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = A * B passed!" << std::endl; C += viennacl::linalg::prod(A, B); vcl_C += viennacl::linalg::prod(vcl_A, vcl_B); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C += A * B passed!" << std::endl; C -= viennacl::linalg::prod(A, B); vcl_C -= viennacl::linalg::prod(vcl_A, vcl_B); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C -= A * B passed!" << std::endl; // Test: C +-= A * trans(B) -------------------------------------------------------------------------- C = boost::numeric::ublas::prod(A, trans(B_trans)); vcl_C = viennacl::linalg::prod(vcl_A, trans(vcl_B_trans)); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = A * trans(B) passed!" << std::endl; C += boost::numeric::ublas::prod(A, trans(B_trans)); vcl_C += viennacl::linalg::prod(vcl_A, trans(vcl_B_trans)); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C += A * trans(B) passed!" << std::endl; C -= boost::numeric::ublas::prod(A, trans(B_trans)); vcl_C -= viennacl::linalg::prod(vcl_A, trans(vcl_B_trans)); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C -= A * trans(B) passed!" << std::endl; // Test: C +-= trans(A) * B -------------------------------------------------------------------------- C = boost::numeric::ublas::prod(trans(A_trans), B); vcl_C = viennacl::linalg::prod(trans(vcl_A_trans), vcl_B); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = trans(A) * B passed!" << std::endl; C += boost::numeric::ublas::prod(trans(A_trans), B); vcl_C += viennacl::linalg::prod(trans(vcl_A_trans), vcl_B); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C += trans(A) * B passed!" << std::endl; C -= boost::numeric::ublas::prod(trans(A_trans), B); vcl_C -= viennacl::linalg::prod(trans(vcl_A_trans), vcl_B); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C -= trans(A) * B passed!" << std::endl; // Test: C +-= trans(A) * trans(B) -------------------------------------------------------------------------- C = boost::numeric::ublas::prod(trans(A_trans), trans(B_trans)); vcl_C = viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans)); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = trans(A) * trans(B) passed!" << std::endl; C += boost::numeric::ublas::prod(trans(A_trans), trans(B_trans)); vcl_C += viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans)); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C += trans(A) * trans(B) passed!" << std::endl; C -= boost::numeric::ublas::prod(trans(A_trans), trans(B_trans)); vcl_C -= viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans)); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C -= trans(A) * trans(B) passed!" << std::endl; return retval; } template< typename NumericT, typename F_A, typename F_B, typename F_C, typename Epsilon > int test_prod(Epsilon const& epsilon) { int ret; long matrix_size1 = 131; //some odd number, not too large long matrix_size2 = 67; //some odd number, not too large long matrix_size3 = 73; //some odd number, not too large //long matrix_size1 = 128; //some odd number, not too large //long matrix_size2 = 64; //some odd number, not too large //long matrix_size3 = 128; //some odd number, not too large //long matrix_size1 = 256; // for testing AMD kernels //long matrix_size2 = 256; // for testing AMD kernels //long matrix_size3 = 256; // for testing AMD kernels // -------------------------------------------------------------------------- // ublas reference: ublas::matrix A(matrix_size1, matrix_size2); ublas::matrix big_A = ublas::scalar_matrix(4*matrix_size1, 4*matrix_size2, NumericT(3.1415)); ublas::matrix B(matrix_size2, matrix_size3); ublas::matrix big_B = ublas::scalar_matrix(4*matrix_size2, 4*matrix_size3, NumericT(42.0)); ublas::matrix C(matrix_size1, matrix_size3); //fill A and B: for (unsigned int i = 0; i < A.size1(); ++i) for (unsigned int j = 0; j < A.size2(); ++j) A(i,j) = static_cast(0.1) * random(); for (unsigned int i = 0; i < B.size1(); ++i) for (unsigned int j = 0; j < B.size2(); ++j) B(i,j) = static_cast(0.1) * random(); ublas::matrix A_trans = trans(A); ublas::matrix big_A_trans = trans(big_A); ublas::matrix B_trans = trans(B); ublas::matrix big_B_trans = trans(big_B); // // ViennaCL objects // // A viennacl::range range1_A(matrix_size1, 2*matrix_size1); viennacl::range range2_A(matrix_size2, 2*matrix_size2); viennacl::slice slice1_A(matrix_size1, 2, matrix_size1); viennacl::slice slice2_A(matrix_size2, 3, matrix_size2); viennacl::matrix vcl_A(matrix_size1, matrix_size2); viennacl::copy(A, vcl_A); viennacl::matrix vcl_big_range_A(4*matrix_size1, 4*matrix_size2); viennacl::matrix_range > vcl_range_A(vcl_big_range_A, range1_A, range2_A); viennacl::copy(A, vcl_range_A); viennacl::matrix vcl_big_slice_A(4*matrix_size1, 4*matrix_size2); viennacl::matrix_slice > vcl_slice_A(vcl_big_slice_A, slice1_A, slice2_A); viennacl::copy(A, vcl_slice_A); // A^T viennacl::matrix vcl_A_trans(matrix_size2, matrix_size1); viennacl::copy(A_trans, vcl_A_trans); viennacl::matrix vcl_big_range_A_trans(4*matrix_size2, 4*matrix_size1); viennacl::matrix_range > vcl_range_A_trans(vcl_big_range_A_trans, range2_A, range1_A); viennacl::copy(A_trans, vcl_range_A_trans); viennacl::matrix vcl_big_slice_A_trans(4*matrix_size2, 4*matrix_size1); viennacl::matrix_slice > vcl_slice_A_trans(vcl_big_slice_A_trans, slice2_A, slice1_A); viennacl::copy(A_trans, vcl_slice_A_trans); // B viennacl::range range1_B(2*matrix_size2, 3*matrix_size2); viennacl::range range2_B(2*matrix_size3, 3*matrix_size3); viennacl::slice slice1_B(matrix_size2, 3, matrix_size2); viennacl::slice slice2_B(matrix_size3, 2, matrix_size3); viennacl::matrix vcl_B(matrix_size2, matrix_size3); viennacl::copy(B, vcl_B); viennacl::matrix vcl_big_range_B(4*matrix_size2, 4*matrix_size3); viennacl::matrix_range > vcl_range_B(vcl_big_range_B, range1_B, range2_B); viennacl::copy(B, vcl_range_B); viennacl::matrix vcl_big_slice_B(4*matrix_size2, 4*matrix_size3); viennacl::matrix_slice > vcl_slice_B(vcl_big_slice_B, slice1_B, slice2_B); viennacl::copy(B, vcl_slice_B); // B^T viennacl::matrix vcl_B_trans(matrix_size3, matrix_size2); viennacl::copy(B_trans, vcl_B_trans); viennacl::matrix vcl_big_range_B_trans(4*matrix_size3, 4*matrix_size2); viennacl::matrix_range > vcl_range_B_trans(vcl_big_range_B_trans, range2_B, range1_B); viennacl::copy(B_trans, vcl_range_B_trans); viennacl::matrix vcl_big_slice_B_trans(4*matrix_size3, 4*matrix_size2); viennacl::matrix_slice > vcl_slice_B_trans(vcl_big_slice_B_trans, slice2_B, slice1_B); viennacl::copy(B_trans, vcl_slice_B_trans); // C viennacl::range range1_C(matrix_size1-1, 2*matrix_size1-1); viennacl::range range2_C(matrix_size3-1, 2*matrix_size3-1); viennacl::slice slice1_C(matrix_size1-1, 3, matrix_size1); viennacl::slice slice2_C(matrix_size3-1, 3, matrix_size3); viennacl::matrix vcl_C(matrix_size1, matrix_size3); viennacl::matrix vcl_big_range_C(4*matrix_size1, 4*matrix_size3); viennacl::matrix_range > vcl_range_C(vcl_big_range_C, range1_C, range2_C); viennacl::matrix vcl_big_slice_C(4*matrix_size1, 4*matrix_size3); viennacl::matrix_slice > vcl_slice_C(vcl_big_slice_C, slice1_C, slice2_C); std::cout << "--- Part 1: Testing matrix-matrix products ---" << std::endl; ////// ////// A: matrix ////// // // std::cout << "Now using A=matrix, B=matrix, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_B, vcl_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=matrix, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_B, vcl_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=matrix, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_B, vcl_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=range, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_range_B, vcl_range_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=range, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_range_B, vcl_range_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=range, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_range_B, vcl_range_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=slice, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=slice, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=slice, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; ////// ////// A: range ////// // // std::cout << "Now using A=range, B=matrix, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_B, vcl_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=matrix, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_B, vcl_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=matrix, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_B, vcl_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=range, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_range_B, vcl_range_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=range, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_range_B, vcl_range_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=range, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_range_B, vcl_range_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=slice, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=slice, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=slice, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; ////// ////// A: slice ////// // // std::cout << "Now using A=slice, B=matrix, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_B, vcl_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=matrix, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_B, vcl_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=matrix, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_B, vcl_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=range, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_range_B, vcl_range_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=range, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_range_B, vcl_range_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=range, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_range_B, vcl_range_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=slice, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=slice, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=slice, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; return ret; } // // Control functions // template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=row, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=row, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=col, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=col, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=row, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=row, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=col, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=col, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; return ret; } ViennaCL-1.5.1-src/tests/src/fft.cpp000644 001750 001750 00000031637 12267307531 017214 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include #include #include #include #include #include //#define VIENNACL_BUILD_INFO #include "viennacl/fft.hpp" typedef float ScalarType; const ScalarType EPS = ScalarType(0.06f); //use smaller values in double precision typedef ScalarType (*test_function_ptr)(std::vector&, std::vector&, unsigned int, unsigned int, unsigned int); typedef void (*input_function_ptr)(std::istream&, std::vector&, std::vector&, unsigned int&, unsigned int&, unsigned int&); void read_vectors_pair(std::istream& str, std::vector& input, std::vector& output, unsigned int& rows, unsigned int& cols, unsigned int& batch_size) { rows = 1; str >> cols >> batch_size; input.resize(2 * cols * batch_size); output.resize(2 * cols * batch_size); for(unsigned int i = 0; i < input.size(); i++) str >> input[i]; for(unsigned int i = 0; i < output.size(); i++) str >> output[i]; } void read_matrices_pair(std::istream& str, std::vector& input, std::vector& output, unsigned int& rows, unsigned int& cols, unsigned int& batch_size) { batch_size = 1; str >> rows >> cols; input.resize(2 * rows * cols); output.resize(2 * rows * cols); for(unsigned int i = 0; i < input.size(); i++) { str >> input[i]; } for(unsigned int i = 0; i < output.size(); i++) { str >> output[i]; } } template ScalarType diff(std::vector& vec, std::vector& ref) { ScalarType df = 0.0; ScalarType norm_ref = 0; for(std::size_t i = 0; i < vec.size(); i++) { df = df + pow(vec[i] - ref[i], 2); norm_ref += ref[i] * ref[i]; } return sqrt(df / norm_ref) ; } template ScalarType diff_max(std::vector& vec, std::vector& ref) { ScalarType df = 0.0; ScalarType mx = 0.0; ScalarType norm_max = 0; for (std::size_t i = 0; i < vec.size(); i++) { df = std::max(fabs(vec[i] - ref[i]), df); mx = std::max(fabs(vec[i]), mx); if (mx > 0) { if (norm_max < df / mx) norm_max = df / mx; } } return norm_max; } void convolve_ref(std::vector& in1, std::vector& in2, std::vector& out) { out.resize(in1.size()); unsigned int data_size = static_cast(in1.size()) >> 1; for(unsigned int n = 0; n < data_size; n++) { std::complex el; for(unsigned int k = 0; k < data_size; k++) { int offset = (n - k); if(offset < 0) offset += data_size; std::complex m1(in1[2*k], in1[2*k + 1]); std::complex m2(in2[2*offset], in2[2*offset + 1]); // std::cout << offset << " " << m1 << " " << m2 << "\n"; el = el + m1 * m2 ; } //std::cout << "Answer - " << el << "\n"; out[2*n] = el.real(); out[2*n + 1] = el.imag(); } } ScalarType opencl_fft(std::vector& in, std::vector& out, unsigned int /*row*/, unsigned int /*col*/, unsigned int batch_size) { viennacl::vector input(in.size()); viennacl::vector output(in.size()); std::vector res(in.size()); viennacl::fast_copy(in, input); viennacl::fft(input, output, batch_size); viennacl::backend::finish(); viennacl::fast_copy(output, res); return diff_max(res, out); } ScalarType opencl_2d_fft_1arg(std::vector& in, std::vector& out, unsigned int row, unsigned int col, unsigned int /*batch_size*/) { viennacl::matrix input(row, 2 * col); std::vector res(in.size()); viennacl::fast_copy(&in[0], &in[0] + in.size(), input); //std::cout << input << "\n"; viennacl::inplace_fft(input); //std::cout << input << "\n"; viennacl::backend::finish(); viennacl::fast_copy(input, &res[0]); return diff_max(res, out); } ScalarType opencl_2d_fft_2arg(std::vector& in, std::vector& out, unsigned int row, unsigned int col, unsigned int /*batch_size*/) { viennacl::matrix input(row, 2 * col); viennacl::matrix output(row, 2 * col); std::vector res(in.size()); viennacl::fast_copy(&in[0], &in[0] + in.size(), input); //std::cout << input << "\n"; viennacl::fft(input, output); //std::cout << input << "\n"; viennacl::backend::finish(); viennacl::fast_copy(output, &res[0]); return diff_max(res, out); } ScalarType opencl_direct(std::vector& in, std::vector& out, unsigned int /*row*/, unsigned int /*col*/, unsigned int batch_num) { viennacl::vector input(in.size()); viennacl::vector output(in.size()); std::vector res(in.size()); viennacl::fast_copy(in, input); unsigned int size = (static_cast(input.size()) >> 1) / batch_num; viennacl::detail::fft::direct(input.handle().opencl_handle(), output.handle().opencl_handle(), size, size, batch_num); viennacl::backend::finish(); viennacl::fast_copy(output, res); return diff_max(res, out); } ScalarType opencl_bluestein(std::vector& in, std::vector& out, unsigned int /*row*/, unsigned int /*col*/, unsigned int batch_size) { viennacl::vector input(in.size()); viennacl::vector output(in.size()); std::vector res(in.size()); viennacl::fast_copy(in, input); viennacl::detail::fft::bluestein(input, output, batch_size); viennacl::backend::finish(); viennacl::fast_copy(output, res); return diff_max(res, out); } ScalarType opencl_radix2(std::vector& in, std::vector& out, unsigned int /*row*/, unsigned int /*col*/, unsigned int batch_num) { viennacl::vector input(in.size()); viennacl::vector output(in.size()); std::vector res(in.size()); viennacl::fast_copy(in, input); unsigned int size = (static_cast(input.size()) >> 1) / batch_num; viennacl::detail::fft::radix2(input.handle().opencl_handle(), size, size, batch_num); viennacl::backend::finish(); viennacl::fast_copy(input, res); return diff_max(res, out); } ScalarType opencl_convolve(std::vector& in1, std::vector& in2, unsigned int /*row*/, unsigned int /*col*/, unsigned int /*batch_size*/) { //if(in1.size() > 2048) return -1; viennacl::vector input1(in1.size()); viennacl::vector input2(in2.size()); viennacl::vector output(in1.size()); viennacl::fast_copy(in1, input1); viennacl::fast_copy(in2, input2); viennacl::linalg::convolve(input1, input2, output); viennacl::backend::finish(); std::vector res(in1.size()); viennacl::fast_copy(output, res); std::vector ref(in1.size()); convolve_ref(in1, in2, ref); return diff_max(res, ref); } int test_correctness(const std::string& log_tag, const std::string& filename, input_function_ptr input_function, test_function_ptr func) { std::vector input; std::vector output; std::fstream fstr; fstr.open(filename.c_str()); std::cout << "*****************" << log_tag << "***************************\n"; unsigned int test_size = 0; fstr >> test_size; std::cout << "Test size: " << test_size << std::endl; for(unsigned int i = 0; i < test_size; i++) { unsigned int batch_size; unsigned int rows_num, cols_num; input_function(fstr, input, output, rows_num, cols_num, batch_size); ScalarType df = func(input, output, rows_num, cols_num, batch_size); printf("%7s NX=%6d NY=%6d; BATCH=%3d; DIFF=%3.15f;\n", ((fabs(df) < EPS)?"[Ok]":"[Fail]"), rows_num, cols_num, batch_size, df); if (df > EPS) return EXIT_FAILURE; } return EXIT_SUCCESS; } int main() { std::cout << "*" << std::endl; std::cout << "* ViennaCL test: FFT" << std::endl; std::cout << "*" << std::endl; //1D FFT tests if (test_correctness("fft::direct", "../non-release/testdata/cufft.data", read_vectors_pair, &opencl_direct) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft::fft", "../non-release/testdata/cufft.data", read_vectors_pair, &opencl_fft) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft::batch::direct", "../non-release/testdata/batch_radix.data", read_vectors_pair, &opencl_direct) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft::radix2", "../non-release/testdata/radix2.data", read_vectors_pair, &opencl_radix2) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft::batch::radix2", "../non-release/testdata/batch_radix.data", read_vectors_pair, &opencl_radix2) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft::batch::fft", "../non-release/testdata/batch_radix.data", read_vectors_pair, &opencl_fft) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft::convolve::1", "../non-release/testdata/cufft.data", read_vectors_pair, &opencl_convolve) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft::convolve::2", "../non-release/testdata/radix2.data", read_vectors_pair, &opencl_convolve) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft::bluestein::1", "../non-release/testdata/cufft.data", read_vectors_pair, &opencl_bluestein) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft::bluestein::2", "../non-release/testdata/radix2.data", read_vectors_pair, &opencl_bluestein) == EXIT_FAILURE) return EXIT_FAILURE; //2D FFT tests if (test_correctness("fft:2d::radix2::sml::1_arg", "../non-release/testdata/fft2d_radix2.data", read_matrices_pair, &opencl_2d_fft_1arg) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft:2d::direct::sml::1_arg", "../non-release/testdata/fft2d_direct.data", read_matrices_pair, &opencl_2d_fft_1arg) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft:2d::direct::big::1_arg", "../non-release/testdata/fft2d_direct_big.data", read_matrices_pair, &opencl_2d_fft_1arg) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft:2d::radix2::sml::2_arg", "../non-release/testdata/fft2d_radix2.data", read_matrices_pair, &opencl_2d_fft_2arg) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft:2d::direct::sml::2_arg", "../non-release/testdata/fft2d_direct.data", read_matrices_pair, &opencl_2d_fft_2arg) == EXIT_FAILURE) return EXIT_FAILURE; if (test_correctness("fft:2d::direct::bscalarig::2_arg", "../non-release/testdata/fft2d_direct_big.data", read_matrices_pair, &opencl_2d_fft_2arg) == EXIT_FAILURE) return EXIT_FAILURE; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/scalar.cu000644 001750 001750 00000033456 12267307531 017530 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include #include // // *** ViennaCL // #include "viennacl/scalar.hpp" // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; NumericT s1 = NumericT(3.1415926); NumericT s2 = NumericT(2.71763); NumericT s3 = NumericT(42); viennacl::scalar vcl_s1; viennacl::scalar vcl_s2; viennacl::scalar vcl_s3 = 1.0; vcl_s1 = s1; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: vcl_s1 = s1;" << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } vcl_s2 = s2; if( fabs(diff(s2, vcl_s2)) > epsilon ) { std::cout << "# Error at operation: vcl_s2 = s2;" << std::endl; std::cout << " diff: " << fabs(diff(s2, vcl_s2)) << std::endl; retval = EXIT_FAILURE; } vcl_s3 = s3; if( s3 != vcl_s3 ) { std::cout << "# Error at operation: vcl_s3 = s3;" << std::endl; std::cout << " diff: " << s3 - vcl_s3 << std::endl; retval = EXIT_FAILURE; } NumericT tmp = s2; s2 = s1; s1 = tmp; viennacl::linalg::swap(vcl_s1, vcl_s2); if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: swap " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2; vcl_s1 += vcl_s2; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: += " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 *= s3; vcl_s1 *= vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: *= " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2; vcl_s1 -= vcl_s2; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: -= " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 /= s3; vcl_s1 /= vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: /= " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = vcl_s1; s1 = s2 + s3; vcl_s1 = vcl_s2 + vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 + s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 + s3; vcl_s1 += vcl_s2 + vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 + s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 + s3; vcl_s1 -= vcl_s2 + vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 + s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 - s3; vcl_s1 = vcl_s2 - vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 - s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 - s3; vcl_s1 += vcl_s2 - vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 - s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 - s3; vcl_s1 -= vcl_s2 - vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 - s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 * s3; vcl_s1 = vcl_s2 * vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 * s3; vcl_s1 += vcl_s2 * vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 * s3; vcl_s1 -= vcl_s2 * vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 / s3; vcl_s1 = vcl_s2 / vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 / s3; vcl_s1 += vcl_s2 / vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 / s3; vcl_s1 -= vcl_s2 / vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } // addition with factors, = vcl_s1 = s1; s1 = s2 * s2 + s3 * s3; vcl_s1 = vcl_s2 * s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 * s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } vcl_s1 = vcl_s2 * vcl_s2 + vcl_s3 * vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 * s2 + s3 * s3, second test " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 * s2 + s3 / s3; vcl_s1 = vcl_s2 * s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 * s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } vcl_s1 = vcl_s2 * vcl_s2 + vcl_s3 / vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 * s2 + s3 / s3, second test " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 / s2 + s3 * s3; vcl_s1 = vcl_s2 / s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 / s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } vcl_s1 = vcl_s2 / vcl_s2 + vcl_s3 * vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 / s2 + s3 * s3, second test " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 / s2 + s3 / s3; vcl_s1 = vcl_s2 / s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 / s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } vcl_s1 = vcl_s2 / vcl_s2 + vcl_s3 / vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 / s2 + s3 / s3, second test " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } // addition with factors, += vcl_s1 = s1; s1 += s2 * s2 + s3 * s3; vcl_s1 += vcl_s2 * s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 * s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 * s2 + s3 / s3; vcl_s1 += vcl_s2 * s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 * s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 / s2 + s3 * s3; vcl_s1 += vcl_s2 / s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 / s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 / s2 + s3 / s3; vcl_s1 += vcl_s2 / s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 / s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } // addition with factors, -= vcl_s1 = s1; s1 -= s2 * s2 + s3 * s3; vcl_s1 -= vcl_s2 * s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 * s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 * s2 + s3 / s3; vcl_s1 -= vcl_s2 * s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 * s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 / s2 + s3 * s3; vcl_s1 -= vcl_s2 / s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 / s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 / s2 + s3 / s3; vcl_s1 -= vcl_s2 / s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 / s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } // lenghty expression: s1 = s2 + s3 * s2 - s3 / s1; vcl_s1 = vcl_s2 + vcl_s3 * vcl_s2 - vcl_s3 / vcl_s1; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: + * - / " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Scalar" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-5); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-10; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } // // ------------------------------------------------------------- // ViennaCL-1.5.1-src/tests/src/matrix_vector.cu000644 001750 001750 00000123423 12267307531 021143 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/lu.hpp" #include "examples/tutorial/Random.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(ublas::vector const & v1, VCLVectorType const & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return norm_inf(v2_cpu); } template ScalarType diff(ublas::matrix const & mat1, VCLMatrixType const & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } // // ------------------------------------------------------------- // template int test_prod_rank1(Epsilon const & epsilon, UblasMatrixType & ublas_m1, UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, UblasMatrixType & ublas_m2, VCLMatrixType & vcl_m1, VCLVectorType1 & vcl_v1, VCLVectorType2 & vcl_v2, VCLMatrixType & vcl_m2) { int retval = EXIT_SUCCESS; // sync data: ublas_v1 = ublas::scalar_vector(ublas_v1.size(), NumericT(0.1234)); ublas_v2 = ublas::scalar_vector(ublas_v2.size(), NumericT(0.4321)); viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); viennacl::copy(ublas_m1, vcl_m1); // -------------------------------------------------------------------------- std::cout << "Rank 1 update" << std::endl; ublas_m1 += ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2); if( std::fabs(diff(ublas_m1, vcl_m1)) > epsilon ) { std::cout << "# Error at operation: rank 1 update" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_m1, vcl_m1)) << std::endl; return EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Scaled rank 1 update - CPU Scalar" << std::endl; ublas_m1 += NumericT(4.2) * ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += NumericT(2.1) * viennacl::linalg::outer_prod(vcl_v1, vcl_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2) * NumericT(2.1); //check proper compilation if( std::fabs(diff(ublas_m1, vcl_m1)) > epsilon ) { std::cout << "# Error at operation: scaled rank 1 update - CPU Scalar" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_m1, vcl_m1)) << std::endl; return EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Scaled rank 1 update - GPU Scalar" << std::endl; ublas_m1 += NumericT(4.2) * ublas::outer_prod(ublas_v1, ublas_v2); vcl_m1 += viennacl::scalar(NumericT(2.1)) * viennacl::linalg::outer_prod(vcl_v1, vcl_v2); vcl_m1 += viennacl::linalg::outer_prod(vcl_v1, vcl_v2) * viennacl::scalar(NumericT(2.1)); //check proper compilation if( std::fabs(diff(ublas_m1, vcl_m1)) > epsilon ) { std::cout << "# Error at operation: scaled rank 1 update - GPU Scalar" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_m1, vcl_m1)) << std::endl; return EXIT_FAILURE; } //reset vcl_matrix: viennacl::copy(ublas_m1, vcl_m1); // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product" << std::endl; ublas_v1 = viennacl::linalg::prod(ublas_m1, ublas_v2); vcl_v1 = viennacl::linalg::prod(vcl_m1, vcl_v2); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- std::cout << "Matrix-Vector product with scaled add" << std::endl; NumericT alpha = static_cast(2.786); NumericT beta = static_cast(1.432); viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * viennacl::linalg::prod(ublas_m1, ublas_v2) + beta * ublas_v1; vcl_v1 = alpha * viennacl::linalg::prod(vcl_m1, vcl_v2) + beta * vcl_v1; if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Transposed Matrix-Vector product" << std::endl; ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1); vcl_v2 = alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1); if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: transposed matrix-vector product" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Transposed Matrix-Vector product with scaled add" << std::endl; ublas_v2 = alpha * viennacl::linalg::prod(trans(ublas_m1), ublas_v1) + beta * ublas_v2; vcl_v2 = alpha * viennacl::linalg::prod(trans(vcl_m1), vcl_v1) + beta * vcl_v2; if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: transposed matrix-vector product with scaled additions" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Row extraction from matrix" << std::endl; ublas_v2 = row(ublas_m1, std::size_t(7)); vcl_v2 = row(vcl_m1, std::size_t(7)); if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: diagonal extraction from matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Column extraction from matrix" << std::endl; ublas_v1 = column(ublas_m1, std::size_t(7)); vcl_v1 = column(vcl_m1, std::size_t(7)); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: diagonal extraction from matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); viennacl::copy(ublas_m2, vcl_m2); UblasMatrixType A = ublas_m2; std::cout << "Diagonal extraction from matrix" << std::endl; for (std::size_t i=0; i(-3)); if( std::fabs(diff(ublas_v2, vcl_v2)) > epsilon ) { std::cout << "# Error at operation: diagonal extraction from matrix" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v2, vcl_v2)) << std::endl; retval = EXIT_FAILURE; } std::cout << "Matrix diagonal assignment from vector" << std::endl; A = ublas::scalar_matrix(A.size1(), A.size2(), NumericT(0)); for (std::size_t i=0; i(ublas_m1.size2()) - static_cast(A.size1())); if( std::fabs(diff(A, vcl_m2)) > epsilon ) { std::cout << "# Error at operation: Matrix assignment from diagonal" << std::endl; std::cout << " diff: " << std::fabs(diff(A, vcl_m2)) << std::endl; retval = EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; } template int test_solve(Epsilon const & epsilon, UblasMatrixType & ublas_m1, UblasVectorType & ublas_v1, VCLMatrixType & vcl_m1, VCLVectorType1 & vcl_v1) { int retval = EXIT_SUCCESS; // sync data: //viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v1, vcl_v1); viennacl::copy(ublas_m1, vcl_m1); /////////////////// test direct solvers //////////////////////////// //upper triangular: std::cout << "Upper triangular solver" << std::endl; ublas_v1 = ublas::solve(ublas_m1, ublas_v1, ublas::upper_tag()); vcl_v1 = viennacl::linalg::solve(vcl_m1, vcl_v1, viennacl::linalg::upper_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: upper triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //upper unit triangular: std::cout << "Upper unit triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(ublas_m1, ublas_v1, ublas::unit_upper_tag()); vcl_v1 = viennacl::linalg::solve(vcl_m1, vcl_v1, viennacl::linalg::unit_upper_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: unit upper triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //lower triangular: std::cout << "Lower triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(ublas_m1, ublas_v1, ublas::lower_tag()); vcl_v1 = viennacl::linalg::solve(vcl_m1, vcl_v1, viennacl::linalg::lower_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: lower triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //lower unit triangular: std::cout << "Lower unit triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(ublas_m1, ublas_v1, ublas::unit_lower_tag()); vcl_v1 = viennacl::linalg::solve(vcl_m1, vcl_v1, viennacl::linalg::unit_lower_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: unit lower triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //transposed upper triangular: std::cout << "Transposed upper triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(trans(ublas_m1), ublas_v1, ublas::upper_tag()); vcl_v1 = viennacl::linalg::solve(trans(vcl_m1), vcl_v1, viennacl::linalg::upper_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: upper triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //transposed upper unit triangular: std::cout << "Transposed unit upper triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(trans(ublas_m1), ublas_v1, ublas::unit_upper_tag()); vcl_v1 = viennacl::linalg::solve(trans(vcl_m1), vcl_v1, viennacl::linalg::unit_upper_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: unit upper triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //transposed lower triangular: std::cout << "Transposed lower triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(trans(ublas_m1), ublas_v1, ublas::lower_tag()); vcl_v1 = viennacl::linalg::solve(trans(vcl_m1), vcl_v1, viennacl::linalg::lower_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: lower triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } //transposed lower unit triangular: std::cout << "Transposed unit lower triangular solver" << std::endl; viennacl::copy(ublas_v1, vcl_v1); ublas_v1 = ublas::solve(trans(ublas_m1), ublas_v1, ublas::unit_lower_tag()); vcl_v1 = viennacl::linalg::solve(trans(vcl_m1), vcl_v1, viennacl::linalg::unit_lower_tag()); if( std::fabs(diff(ublas_v1, vcl_v1)) > epsilon ) { std::cout << "# Error at operation: unit lower triangular solver" << std::endl; std::cout << " diff: " << std::fabs(diff(ublas_v1, vcl_v1)) << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename F, typename Epsilon > int test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; std::size_t num_rows = 141; //note: use num_rows > num_cols + 3 for diag() tests to work std::size_t num_cols = 103; // -------------------------------------------------------------------------- ublas::vector ublas_v1(num_rows); for (std::size_t i = 0; i < ublas_v1.size(); ++i) ublas_v1(i) = random(); ublas::vector ublas_v2 = ublas::scalar_vector(num_cols, NumericT(3.1415)); ublas::matrix ublas_m1(ublas_v1.size(), ublas_v2.size()); for (std::size_t i = 0; i < ublas_m1.size1(); ++i) for (std::size_t j = 0; j < ublas_m1.size2(); ++j) ublas_m1(i,j) = static_cast(0.1) * random(); ublas::matrix ublas_m2(ublas_v1.size(), ublas_v1.size()); for (std::size_t i = 0; i < ublas_m2.size1(); ++i) { for (std::size_t j = 0; j < ublas_m2.size2(); ++j) ublas_m2(i,j) = static_cast(-0.1) * random(); ublas_m2(i, i) = static_cast(2) + random(); } viennacl::vector vcl_v1_native(ublas_v1.size()); viennacl::vector vcl_v1_large(4 * ublas_v1.size()); viennacl::vector_range< viennacl::vector > vcl_v1_range(vcl_v1_large, viennacl::range(3, ublas_v1.size() + 3)); viennacl::vector_slice< viennacl::vector > vcl_v1_slice(vcl_v1_large, viennacl::slice(2, 3, ublas_v1.size())); viennacl::vector vcl_v2_native(ublas_v2.size()); viennacl::vector vcl_v2_large(4 * ublas_v2.size()); viennacl::vector_range< viennacl::vector > vcl_v2_range(vcl_v2_large, viennacl::range(8, ublas_v2.size() + 8)); viennacl::vector_slice< viennacl::vector > vcl_v2_slice(vcl_v2_large, viennacl::slice(6, 2, ublas_v2.size())); viennacl::matrix vcl_m1_native(ublas_m1.size1(), ublas_m1.size2()); viennacl::matrix vcl_m1_large(4 * ublas_m1.size1(), 4 * ublas_m1.size2()); viennacl::matrix_range< viennacl::matrix > vcl_m1_range(vcl_m1_large, viennacl::range(8, ublas_m1.size1() + 8), viennacl::range(ublas_m1.size2(), 2 * ublas_m1.size2()) ); viennacl::matrix_slice< viennacl::matrix > vcl_m1_slice(vcl_m1_large, viennacl::slice(6, 2, ublas_m1.size1()), viennacl::slice(ublas_m1.size2(), 2, ublas_m1.size2()) ); viennacl::matrix vcl_m2_native(ublas_m2.size1(), ublas_m2.size2()); viennacl::matrix vcl_m2_large(4 * ublas_m2.size1(), 4 * ublas_m2.size2()); viennacl::matrix_range< viennacl::matrix > vcl_m2_range(vcl_m2_large, viennacl::range(8, ublas_m2.size1() + 8), viennacl::range(ublas_m2.size2(), 2 * ublas_m2.size2()) ); viennacl::matrix_slice< viennacl::matrix > vcl_m2_slice(vcl_m2_large, viennacl::slice(6, 2, ublas_m2.size1()), viennacl::slice(ublas_m2.size2(), 2, ublas_m2.size2()) ); /* std::cout << "Matrix resizing (to larger)" << std::endl; matrix.resize(2*num_rows, 2*num_cols, true); for (unsigned int i = 0; i < matrix.size1(); ++i) { for (unsigned int j = (i epsilon ) { std::cout << "# Error at operation: matrix resize (to larger)" << std::endl; std::cout << " diff: " << std::fabs(diff(matrix, vcl_matrix)) << std::endl; return EXIT_FAILURE; } matrix(12, 14) = NumericT(1.9); matrix(19, 16) = NumericT(1.0); matrix (13, 15) = NumericT(-9); vcl_matrix(12, 14) = NumericT(1.9); vcl_matrix(19, 16) = NumericT(1.0); vcl_matrix (13, 15) = NumericT(-9); std::cout << "Matrix resizing (to smaller)" << std::endl; matrix.resize(result.size(), rhs.size(), true); vcl_matrix.resize(result.size(), rhs.size(), true); if( std::fabs(diff(matrix, vcl_matrix)) > epsilon ) { std::cout << "# Error at operation: matrix resize (to smaller)" << std::endl; std::cout << " diff: " << std::fabs(diff(matrix, vcl_matrix)) << std::endl; return EXIT_FAILURE; } */ // // Run a bunch of tests for rank-1-updates, matrix-vector products // std::cout << "------------ Testing rank-1-updates and matrix-vector products ------------------" << std::endl; std::cout << "* m = full, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_native, vcl_v2_native, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_native, vcl_v2_range, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_native, vcl_v2_slice, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = range std::cout << "* m = full, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_range, vcl_v2_native, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_range, vcl_v2_range, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_range, vcl_v2_slice, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = slice std::cout << "* m = full, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_slice, vcl_v2_native, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_slice, vcl_v2_range, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_native, vcl_v1_slice, vcl_v2_slice, vcl_m2_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; ///////////////////////////// matrix_range std::cout << "* m = range, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_native, vcl_v2_native, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_native, vcl_v2_range, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_native, vcl_v2_slice, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = range std::cout << "* m = range, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_range, vcl_v2_native, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_range, vcl_v2_range, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_range, vcl_v2_slice, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = slice std::cout << "* m = range, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_slice, vcl_v2_native, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_slice, vcl_v2_range, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_range, vcl_v1_slice, vcl_v2_slice, vcl_m2_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; ///////////////////////////// matrix_slice std::cout << "* m = slice, v1 = full, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_native, vcl_v2_native, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = full, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_native, vcl_v2_range, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = full, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_native, vcl_v2_slice, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = range std::cout << "* m = slice, v1 = range, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_range, vcl_v2_native, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = range, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_range, vcl_v2_range, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = range, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_range, vcl_v2_slice, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // v1 = slice std::cout << "* m = slice, v1 = slice, v2 = full" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_slice, vcl_v2_native, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = slice, v2 = range" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_slice, vcl_v2_range, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = slice, v2 = slice" << std::endl; retval = test_prod_rank1(epsilon, ublas_m1, ublas_v1, ublas_v2, ublas_m2, vcl_m1_slice, vcl_v1_slice, vcl_v2_slice, vcl_m2_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; // // Testing triangular solve() routines // std::cout << "------------ Testing triangular solves ------------------" << std::endl; std::cout << "* m = full, v1 = full" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_native, vcl_v1_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = range" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_native, vcl_v1_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = full, v1 = slice" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_native, vcl_v1_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; ///////// matrix_range std::cout << "* m = range, v1 = full" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_range, vcl_v1_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = range" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_range, vcl_v1_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = range, v1 = slice" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_range, vcl_v1_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; //////// matrix_slice std::cout << "* m = slice, v1 = full" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_slice, vcl_v1_native); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = range" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_slice, vcl_v1_range); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; std::cout << "* m = slice, v1 = slice" << std::endl; retval = test_solve(epsilon, ublas_m2, ublas_v1, vcl_m2_slice, vcl_v1_slice); if (retval == EXIT_FAILURE) { std::cout << " --- FAILED! ---" << std::endl; return retval; } else std::cout << " --- PASSED ---" << std::endl; ////////////// Final test for full LU decomposition: //full solver: std::cout << "Full solver" << std::endl; unsigned int lu_dim = 100; ublas::matrix square_matrix(lu_dim, lu_dim); ublas::vector lu_rhs(lu_dim); viennacl::matrix vcl_square_matrix(lu_dim, lu_dim); viennacl::vector vcl_lu_rhs(lu_dim); for (std::size_t i=0; i(0.5) * random(); //put some more weight on diagonal elements: for (std::size_t j=0; j(20.0) + random(); lu_rhs(j) = random(); } viennacl::copy(square_matrix, vcl_square_matrix); viennacl::copy(lu_rhs, vcl_lu_rhs); //ublas:: ublas::lu_factorize(square_matrix); ublas::inplace_solve (square_matrix, lu_rhs, ublas::unit_lower_tag ()); ublas::inplace_solve (square_matrix, lu_rhs, ublas::upper_tag ()); // ViennaCL: viennacl::linalg::lu_factorize(vcl_square_matrix); //viennacl::copy(square_matrix, vcl_square_matrix); viennacl::linalg::lu_substitute(vcl_square_matrix, vcl_lu_rhs); if( std::fabs(diff(lu_rhs, vcl_lu_rhs)) > epsilon ) { std::cout << "# Error at operation: dense solver" << std::endl; std::cout << " diff: " << std::fabs(diff(lu_rhs, vcl_lu_rhs)) << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/libviennacl_blas2.cpp000644 001750 001750 00000034265 12267307531 022006 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /* * * Testing the ViennaCL BLAS-like shared library * */ // include necessary system headers #include #include // Some helper functions for this tutorial: #include "viennacl.hpp" #include "viennacl/vector.hpp" template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(std::vector const & v1, ViennaCLVectorType const & vcl_vec) { std::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); ScalarType inf_norm = 0; for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; if (v2_cpu[i] > inf_norm) inf_norm = v2_cpu[i]; } return inf_norm; } template void check(T const & t, U const & u, EpsilonT eps) { EpsilonT rel_error = diff(t,u); if (rel_error > eps) { std::cerr << "Relative error: " << rel_error << std::endl; std::cerr << "Aborting!" << std::endl; exit(EXIT_FAILURE); } std::cout << "SUCCESS "; } int main() { std::size_t size1 = 13; // at least 7 std::size_t size2 = 11; // at least 7 float eps_float = 1e-5f; double eps_double = 1e-12; ViennaCLBackend my_backend; ViennaCLBackendCreate(&my_backend); std::vector ref_float_x(size1); for (std::size_t i=0; i(i); std::vector ref_float_y(size2); for (std::size_t i=0; i(size2 - i); std::vector ref_float_A(size1*size2); for (std::size_t i=0; i(3*i); std::vector ref_float_B(size1*size2); for (std::size_t i=0; i(2*i); std::vector ref_double_x(size1, 1.0); for (std::size_t i=0; i(i); std::vector ref_double_y(size2, 2.0); for (std::size_t i=0; i(size2 - i); std::vector ref_double_A(size1*size2, 3.0); for (std::size_t i=0; i(3*i); std::vector ref_double_B(size1*size2, 4.0); for (std::size_t i=0; i(2*i); // Host setup viennacl::vector host_float_x = viennacl::scalar_vector(size1, 1.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_float_y = viennacl::scalar_vector(size2, 2.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_float_A = viennacl::scalar_vector(size1*size2, 3.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_float_B = viennacl::scalar_vector(size1*size2, 4.0f, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_double_x = viennacl::scalar_vector(size1, 1.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_double_y = viennacl::scalar_vector(size2, 2.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_double_A = viennacl::scalar_vector(size1*size2, 3.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i host_double_B = viennacl::scalar_vector(size1*size2, 4.0, viennacl::context(viennacl::MAIN_MEMORY)); for (std::size_t i=0; i cuda_float_x = viennacl::scalar_vector(size1, 1.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_float_y = viennacl::scalar_vector(size2, 2.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_float_A = viennacl::scalar_vector(size1*size2, 3.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_float_B = viennacl::scalar_vector(size1*size2, 4.0f, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_double_x = viennacl::scalar_vector(size1, 1.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_double_y = viennacl::scalar_vector(size2, 2.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_double_A = viennacl::scalar_vector(size1*size2, 3.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i cuda_double_B = viennacl::scalar_vector(size1*size2, 4.0, viennacl::context(viennacl::CUDA_MEMORY)); for (std::size_t i=0; i opencl_float_x = viennacl::scalar_vector(size1, 1.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i opencl_float_y = viennacl::scalar_vector(size2, 2.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i opencl_float_A = viennacl::scalar_vector(size1*size2, 3.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i opencl_float_B = viennacl::scalar_vector(size1*size2, 4.0f, viennacl::context(viennacl::ocl::get_context(context_id))); for (std::size_t i=0; i *opencl_double_x = NULL; viennacl::vector *opencl_double_y = NULL; viennacl::vector *opencl_double_A = NULL; viennacl::vector *opencl_double_B = NULL; if( viennacl::ocl::current_device().double_support() ) { opencl_double_x = new viennacl::vector(viennacl::scalar_vector(size1, 1.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i(viennacl::scalar_vector(size2, 2.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i(viennacl::scalar_vector(size1*size2, 3.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i(viennacl::scalar_vector(size1*size2, 4.0, viennacl::context(viennacl::ocl::get_context(context_id)))); for (std::size_t i=0; i(host_float_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_y), 1, 3, 0.1234f, viennacl::linalg::host_based::detail::extract_raw_pointer(host_float_x), 1, 2); check(ref_float_x, host_float_x, eps_float); ViennaCLHostDgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_y), 1, 3, 0.1234, viennacl::linalg::host_based::detail::extract_raw_pointer(host_double_x), 1, 2); check(ref_double_x, host_double_x, eps_double); #ifdef VIENNACL_WITH_CUDA std::cout << std::endl << "CUDA: "; ViennaCLCUDASgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415f, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_A), 2, 1, 2, 3, size2, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_y), 1, 3, 0.1234f, viennacl::linalg::cuda::detail::cuda_arg(cuda_float_x), 1, 2); check(ref_float_x, cuda_float_x, eps_float); ViennaCLCUDADgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_A), 2, 1, 2, 3, size2, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_y), 1, 3, 0.1234, viennacl::linalg::cuda::detail::cuda_arg(cuda_double_x), 1, 2); check(ref_double_x, cuda_double_x, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL std::cout << std::endl << "OpenCL: "; ViennaCLOpenCLSgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415f, viennacl::traits::opencl_handle(opencl_float_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::traits::opencl_handle(opencl_float_y), 1, 3, 0.1234f, viennacl::traits::opencl_handle(opencl_float_x), 1, 2); check(ref_float_x, opencl_float_x, eps_float); if( viennacl::ocl::current_device().double_support() ) { ViennaCLOpenCLDgemv(my_backend, ViennaCLRowMajor, ViennaCLNoTrans, ViennaCLInt(size1/3), ViennaCLInt(size2/4), 3.1415, viennacl::traits::opencl_handle(*opencl_double_A), 2, 1, 2, 3, ViennaCLInt(size2), viennacl::traits::opencl_handle(*opencl_double_y), 1, 3, 0.1234, viennacl::traits::opencl_handle(*opencl_double_x), 1, 2); check(ref_double_x, *opencl_double_x, eps_double); } #endif #ifdef VIENNACL_WITH_OPENCL delete opencl_double_x; delete opencl_double_y; delete opencl_double_A; delete opencl_double_B; #endif ViennaCLBackendDestroy(&my_backend); // // That's it. // std::cout << std::endl << "!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/libviennacl_blas3.cu000644 001750 001750 00000066505 12267307531 021636 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /* * * Testing the ViennaCL BLAS-like shared library * */ // include necessary system headers #include #include // Some helper functions for this tutorial: #include "viennacl.hpp" #include "examples/tutorial/Random.hpp" #include "viennacl/vector.hpp" template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(std::vector const & v1, ViennaCLVectorType const & vcl_vec) { std::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); ScalarType inf_norm = 0; for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; if (v2_cpu[i] > inf_norm) inf_norm = v2_cpu[i]; } return inf_norm; } template void check(T const & t, U const & u, EpsilonT eps) { EpsilonT rel_error = diff(t,u); if (rel_error > eps) { std::cerr << "Relative error: " << rel_error << std::endl; std::cerr << "Aborting!" << std::endl; exit(EXIT_FAILURE); } std::cout << "SUCCESS "; } template T get_value(std::vector & array, ViennaCLInt i, ViennaCLInt j, ViennaCLInt start1, ViennaCLInt start2, ViennaCLInt stride1, ViennaCLInt stride2, ViennaCLInt rows, ViennaCLInt cols, ViennaCLOrder order, ViennaCLTranspose trans) { // row-major if (order == ViennaCLRowMajor && trans == ViennaCLTrans) return array[(j*stride1 + start1) * cols + (i*stride2 + start2)]; else if (order == ViennaCLRowMajor && trans != ViennaCLTrans) return array[(i*stride1 + start1) * cols + (j*stride2 + start2)]; // column-major else if (order != ViennaCLRowMajor && trans == ViennaCLTrans) return array[(j*stride1 + start1) + (i*stride2 + start2) * rows]; return array[(i*stride1 + start1) + (j*stride2 + start2) * rows]; } void test_blas(ViennaCLBackend my_backend, float eps_float, double eps_double, std::vector & C_float, std::vector & C_double, std::vector & A_float, std::vector & A_double, std::vector & B_float, std::vector & B_double, ViennaCLOrder order_C, ViennaCLOrder order_A, ViennaCLOrder order_B, ViennaCLTranspose trans_A, ViennaCLTranspose trans_B, viennacl::vector & host_C_float, viennacl::vector & host_C_double, viennacl::vector & host_A_float, viennacl::vector & host_A_double, viennacl::vector & host_B_float, viennacl::vector & host_B_double #ifdef VIENNACL_WITH_CUDA , viennacl::vector & cuda_C_float, viennacl::vector & cuda_C_double , viennacl::vector & cuda_A_float, viennacl::vector & cuda_A_double , viennacl::vector & cuda_B_float, viennacl::vector & cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , viennacl::vector & opencl_C_float, viennacl::vector * opencl_C_double , viennacl::vector & opencl_A_float, viennacl::vector * opencl_A_double , viennacl::vector & opencl_B_float, viennacl::vector * opencl_B_double #endif ) { ViennaCLInt C_size1 = 42; ViennaCLInt C_size2 = 43; ViennaCLInt C_start1 = 10; ViennaCLInt C_start2 = 11; ViennaCLInt C_stride1 = 2; ViennaCLInt C_stride2 = 3; ViennaCLInt C_rows = C_size1 * C_stride1 + C_start1 + 5; ViennaCLInt C_columns = C_size2 * C_stride2 + C_start2 + 5; ViennaCLInt A_size1 = trans_A ? 44 : 42; ViennaCLInt A_size2 = trans_A ? 42 : 44; ViennaCLInt A_start1 = 12; ViennaCLInt A_start2 = 13; ViennaCLInt A_stride1 = 4; ViennaCLInt A_stride2 = 5; ViennaCLInt A_rows = A_size1 * A_stride1 + A_start1 + 5; ViennaCLInt A_columns = A_size2 * A_stride2 + A_start2 + 5; ViennaCLInt B_size1 = trans_B ? 43 : 44; ViennaCLInt B_size2 = trans_B ? 44 : 43; ViennaCLInt B_start1 = 14; ViennaCLInt B_start2 = 15; ViennaCLInt B_stride1 = 6; ViennaCLInt B_stride2 = 7; ViennaCLInt B_rows = B_size1 * B_stride1 + B_start1 + 5; ViennaCLInt B_columns = B_size2 * B_stride2 + B_start2 + 5; // Compute reference: ViennaCLInt size_k = trans_A ? A_size1 : A_size2; for (ViennaCLInt i=0; i(host_A_float), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::linalg::host_based::detail::extract_raw_pointer(host_B_float), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0f, viennacl::linalg::host_based::detail::extract_raw_pointer(host_C_float), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_float, host_C_float, eps_float); ViennaCLHostDgemm(my_backend, order_A, trans_A, order_B, trans_B, order_C, C_size1, C_size2, size_k, 1.0, viennacl::linalg::host_based::detail::extract_raw_pointer(host_A_double), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::linalg::host_based::detail::extract_raw_pointer(host_B_double), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0, viennacl::linalg::host_based::detail::extract_raw_pointer(host_C_double), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_double, host_C_double, eps_double); #ifdef VIENNACL_WITH_CUDA ViennaCLCUDASgemm(my_backend, order_A, trans_A, order_B, trans_B, order_C, C_size1, C_size2, size_k, 1.0f, viennacl::linalg::cuda::detail::cuda_arg(cuda_A_float), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::linalg::cuda::detail::cuda_arg(cuda_B_float), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0f, viennacl::linalg::cuda::detail::cuda_arg(cuda_C_float), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_float, cuda_C_float, eps_float); ViennaCLCUDADgemm(my_backend, order_A, trans_A, order_B, trans_B, order_C, C_size1, C_size2, size_k, 1.0, viennacl::linalg::cuda::detail::cuda_arg(cuda_A_double), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::linalg::cuda::detail::cuda_arg(cuda_B_double), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0, viennacl::linalg::cuda::detail::cuda_arg(cuda_C_double), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_double, cuda_C_double, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL ViennaCLOpenCLSgemm(my_backend, order_A, trans_A, order_B, trans_B, order_C, C_size1, C_size2, size_k, 1.0f, viennacl::traits::opencl_handle(opencl_A_float), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::traits::opencl_handle(opencl_B_float), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0f, viennacl::traits::opencl_handle(opencl_C_float), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_float, opencl_C_float, eps_float); if (opencl_A_double != NULL && opencl_B_double != NULL && opencl_C_double != NULL) { ViennaCLOpenCLDgemm(my_backend, order_A, trans_A, order_B, trans_B, order_C, C_size1, C_size2, size_k, 1.0, viennacl::traits::opencl_handle(*opencl_A_double), A_start1, A_start2, A_stride1, A_stride2, (order_A == ViennaCLRowMajor) ? A_columns : A_rows, viennacl::traits::opencl_handle(*opencl_B_double), B_start1, B_start2, B_stride1, B_stride2, (order_B == ViennaCLRowMajor) ? B_columns : B_rows, 0.0, viennacl::traits::opencl_handle(*opencl_C_double), C_start1, C_start2, C_stride1, C_stride2, (order_C == ViennaCLRowMajor) ? C_columns : C_rows); check(C_double, *opencl_C_double, eps_double); } #endif std::cout << std::endl; } void test_blas(ViennaCLBackend my_backend, float eps_float, double eps_double, std::vector & C_float, std::vector & C_double, std::vector & A_float, std::vector & A_double, std::vector & B_float, std::vector & B_double, ViennaCLOrder order_C, ViennaCLOrder order_A, ViennaCLOrder order_B, viennacl::vector & host_C_float, viennacl::vector & host_C_double, viennacl::vector & host_A_float, viennacl::vector & host_A_double, viennacl::vector & host_B_float, viennacl::vector & host_B_double #ifdef VIENNACL_WITH_CUDA , viennacl::vector & cuda_C_float, viennacl::vector & cuda_C_double , viennacl::vector & cuda_A_float, viennacl::vector & cuda_A_double , viennacl::vector & cuda_B_float, viennacl::vector & cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , viennacl::vector & opencl_C_float, viennacl::vector * opencl_C_double , viennacl::vector & opencl_A_float, viennacl::vector * opencl_A_double , viennacl::vector & opencl_B_float, viennacl::vector * opencl_B_double #endif ) { std::cout << " -> trans-trans: "; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, order_C, order_A, order_B, ViennaCLTrans, ViennaCLTrans, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> trans-no: "; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, order_C, order_A, order_B, ViennaCLTrans, ViennaCLNoTrans, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> no-trans: "; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, order_C, order_A, order_B, ViennaCLNoTrans, ViennaCLTrans, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> no-no: "; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, order_C, order_A, order_B, ViennaCLNoTrans, ViennaCLNoTrans, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); } void test_blas(ViennaCLBackend my_backend, float eps_float, double eps_double, std::vector & C_float, std::vector & C_double, std::vector & A_float, std::vector & A_double, std::vector & B_float, std::vector & B_double, viennacl::vector & host_C_float, viennacl::vector & host_C_double, viennacl::vector & host_A_float, viennacl::vector & host_A_double, viennacl::vector & host_B_float, viennacl::vector & host_B_double #ifdef VIENNACL_WITH_CUDA , viennacl::vector & cuda_C_float, viennacl::vector & cuda_C_double , viennacl::vector & cuda_A_float, viennacl::vector & cuda_A_double , viennacl::vector & cuda_B_float, viennacl::vector & cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , viennacl::vector & opencl_C_float, viennacl::vector * opencl_C_double , viennacl::vector & opencl_A_float, viennacl::vector * opencl_A_double , viennacl::vector & opencl_B_float, viennacl::vector * opencl_B_double #endif ) { std::cout << " -> C: row, A: row, B: row" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLRowMajor, ViennaCLRowMajor, ViennaCLRowMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: row, A: row, B: col" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLRowMajor, ViennaCLRowMajor, ViennaCLColumnMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: row, A: col, B: row" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLRowMajor, ViennaCLColumnMajor, ViennaCLRowMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: row, A: col, B: col" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLRowMajor, ViennaCLColumnMajor, ViennaCLColumnMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: col, A: row, B: row" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLColumnMajor, ViennaCLRowMajor, ViennaCLRowMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: col, A: row, B: col" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLColumnMajor, ViennaCLRowMajor, ViennaCLColumnMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: col, A: col, B: row" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLColumnMajor, ViennaCLColumnMajor, ViennaCLRowMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); std::cout << " -> C: col, A: col, B: col" << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, ViennaCLColumnMajor, ViennaCLColumnMajor, ViennaCLColumnMajor, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double, cuda_A_float, cuda_A_double, cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double, opencl_A_float, opencl_A_double, opencl_B_float, opencl_B_double #endif ); } int main() { ViennaCLInt size = 500*500; float eps_float = 1e-5f; double eps_double = 1e-12; std::vector C_float(size); std::vector A_float(size); std::vector B_float(size); std::vector C_double(size); std::vector A_double(size); std::vector B_double(size); // fill with random data: for (ViennaCLInt i = 0; i < size; ++i) { C_float[i] = 0.5f + 0.1f * random(); A_float[i] = 0.5f + 0.1f * random(); B_float[i] = 0.5f + 0.1f * random(); C_double[i] = 0.5 + 0.2 * random(); A_double[i] = 0.5 + 0.2 * random(); B_double[i] = 0.5 + 0.2 * random(); } // Host setup ViennaCLBackend my_backend; ViennaCLBackendCreate(&my_backend); viennacl::vector host_C_float(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(C_float, host_C_float); viennacl::vector host_A_float(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(A_float, host_A_float); viennacl::vector host_B_float(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(B_float, host_B_float); viennacl::vector host_C_double(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(C_double, host_C_double); viennacl::vector host_A_double(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(A_double, host_A_double); viennacl::vector host_B_double(size, viennacl::context(viennacl::MAIN_MEMORY)); viennacl::copy(B_double, host_B_double); // CUDA setup #ifdef VIENNACL_WITH_CUDA viennacl::vector cuda_C_float(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(C_float, cuda_C_float); viennacl::vector cuda_A_float(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(A_float, cuda_A_float); viennacl::vector cuda_B_float(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(B_float, cuda_B_float); viennacl::vector cuda_C_double(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(C_double, cuda_C_double); viennacl::vector cuda_A_double(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(A_double, cuda_A_double); viennacl::vector cuda_B_double(size, viennacl::context(viennacl::CUDA_MEMORY)); viennacl::copy(B_double, cuda_B_double); #endif // OpenCL setup #ifdef VIENNACL_WITH_OPENCL ViennaCLInt context_id = 0; viennacl::vector opencl_C_float(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(C_float, opencl_C_float); viennacl::vector opencl_A_float(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(A_float, opencl_A_float); viennacl::vector opencl_B_float(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(B_float, opencl_B_float); viennacl::vector *opencl_C_double = NULL; viennacl::vector *opencl_A_double = NULL; viennacl::vector *opencl_B_double = NULL; if( viennacl::ocl::current_device().double_support() ) { opencl_C_double = new viennacl::vector(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(C_double, *opencl_C_double); opencl_A_double = new viennacl::vector(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(A_double, *opencl_A_double); opencl_B_double = new viennacl::vector(size, viennacl::context(viennacl::ocl::get_context(context_id))); viennacl::copy(B_double, *opencl_B_double); } ViennaCLBackendSetOpenCLContextID(my_backend, context_id); #endif // consistency checks: check(C_float, host_C_float, eps_float); check(A_float, host_A_float, eps_float); check(B_float, host_B_float, eps_float); check(C_double, host_C_double, eps_double); check(A_double, host_A_double, eps_double); check(B_double, host_B_double, eps_double); #ifdef VIENNACL_WITH_CUDA check(C_float, cuda_C_float, eps_float); check(A_float, cuda_A_float, eps_float); check(B_float, cuda_B_float, eps_float); check(C_double, cuda_C_double, eps_double); check(A_double, cuda_A_double, eps_double); check(B_double, cuda_B_double, eps_double); #endif #ifdef VIENNACL_WITH_OPENCL check(C_float, opencl_C_float, eps_float); check(A_float, opencl_A_float, eps_float); check(B_float, opencl_B_float, eps_float); if( viennacl::ocl::current_device().double_support() ) { check(C_double, *opencl_C_double, eps_double); check(A_double, *opencl_A_double, eps_double); check(B_double, *opencl_B_double, eps_double); } #endif std::cout << std::endl; test_blas(my_backend, eps_float, eps_double, C_float, C_double, A_float, A_double, B_float, B_double, host_C_float, host_C_double, host_A_float, host_A_double, host_B_float, host_B_double #ifdef VIENNACL_WITH_CUDA , cuda_C_float, cuda_C_double , cuda_A_float, cuda_A_double , cuda_B_float, cuda_B_double #endif #ifdef VIENNACL_WITH_OPENCL , opencl_C_float, opencl_C_double , opencl_A_float, opencl_A_double , opencl_B_float, opencl_B_double #endif ); #ifdef VIENNACL_WITH_OPENCL //cleanup if( viennacl::ocl::current_device().double_support() ) { delete opencl_C_double; delete opencl_A_double; delete opencl_B_double; } #endif ViennaCLBackendDestroy(&my_backend); // // That's it. // std::cout << std::endl << "!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/spmdm.cu000644 001750 001750 00000027115 12267307531 017376 0ustar00rupprupp000000 000000 // // include necessary system headers // #include #include // // ublas includes // #include #include #include #include #include #include #include #include #include // Must be set if you want to use ViennaCL algorithms on ublas objects #define VIENNACL_WITH_UBLAS 1 //#define VIENNACL_WITH_OPENCL 1 //#define VIENNACL_WITH_CUDA 1 //#define VIENNACL_DEBUG_KERNEL 1 //#define VIENNACL_BUILD_INFO 1 // // ViennaCL includes // #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/coordinate_matrix.hpp" #include "viennacl/ell_matrix.hpp" #include "viennacl/hyb_matrix.hpp" #include "viennacl/linalg/prod.hpp" //generic matrix-vector product #include "viennacl/linalg/norm_2.hpp" //generic l2-norm for vectors #include "viennacl/io/matrix_market.hpp" // Some helper functions for this tutorial: #include "Random.hpp" using namespace boost::numeric; template < typename ScalarType > int check_matrices(const ublas::matrix< ScalarType >& ref_mat, const ublas::matrix< ScalarType >& mat, ScalarType eps) { std::size_t size1, size2; size1 = ref_mat.size1(); size2 = ref_mat.size2(); if( (size1 != mat.size1()) || (size2 != mat.size2()) ) return EXIT_FAILURE; for (unsigned int i = 0; i < size1; i++) for (unsigned int j = 0; j < size2; j++) { ScalarType rel_error = std::abs(ref_mat(i,j) - mat(i,j)) / std::max(std::abs(ref_mat(i,j)), std::abs(mat(i,j))); if ( rel_error > eps ) { std::cout << "ERROR: Verification failed at (" << i <<", "<< j << "): " << " Expected: " << ref_mat(i,j) << ", got: " << mat(i,j) << " (relative error: " << rel_error << ")" << std::endl; return EXIT_FAILURE; } } std::cout << "Everything went well!" << std::endl; return EXIT_SUCCESS; } template int test(NumericT epsilon) { int retVal = EXIT_SUCCESS; ublas::compressed_matrix ublas_lhs; if (viennacl::io::read_matrix_market_file(ublas_lhs, "../../examples/testdata/mat65k.mtx") == EXIT_FAILURE) { std::cout << "Error reading Matrix file" << std::endl; return EXIT_FAILURE; } // add some extra weight to diagonal in order to avoid issues with round-off errors for (std::size_t i=0; i compressed_lhs; viennacl::ell_matrix ell_lhs; viennacl::coordinate_matrix coo_lhs; viennacl::hyb_matrix hyb_lhs; ublas::matrix ublas_result; viennacl::matrix result; viennacl::copy( ublas_lhs, compressed_lhs); viennacl::copy( ublas_lhs, ell_lhs); viennacl::copy( ublas_lhs, coo_lhs); viennacl::copy( ublas_lhs, hyb_lhs); ublas::matrix ublas_rhs1(ublas_lhs.size2(), cols_rhs); viennacl::matrix rhs1(ublas_lhs.size2(), cols_rhs); ublas::matrix ublas_rhs2; viennacl::matrix rhs2; ublas::matrix temp(ublas_rhs1.size1(), cols_rhs); for (unsigned int i = 0; i < ublas_rhs1.size1(); i++) for (unsigned int j = 0; j < ublas_rhs1.size2(); j++) ublas_rhs1(i,j) = NumericT(0.5) + NumericT(0.1) * random(); viennacl::copy( ublas_rhs1, rhs1); ublas_rhs2 = ublas::trans( ublas_rhs1); viennacl::copy( ublas_rhs2, rhs2); /* gold result */ ublas_result = ublas::prod( ublas_lhs, ublas_rhs1); /******************************************************************/ std::cout << "Testing compressed(CSR) lhs * dense rhs" << std::endl; result = viennacl::linalg::prod( compressed_lhs, rhs1); temp.clear(); viennacl::copy( result, temp); retVal = check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(ELL) lhs * dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( ell_lhs, rhs1); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(COO) lhs * dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( coo_lhs, rhs1); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(HYB) lhs * dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( hyb_lhs, rhs1); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ /* gold result */ ublas_result = ublas::prod( ublas_lhs, ublas::trans(ublas_rhs2)); /******************************************************************/ std::cout << std::endl << "Testing compressed(CSR) lhs * transposed dense rhs:" << std::endl; result.clear(); result = viennacl::linalg::prod( compressed_lhs, viennacl::trans(rhs2)); temp.clear(); viennacl::copy( result, temp); retVal = check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(ELL) lhs * transposed dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( ell_lhs, viennacl::trans(rhs2)); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(COO) lhs * transposed dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( coo_lhs, viennacl::trans(rhs2)); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ std::cout << "Testing compressed(HYB) lhs * dense rhs" << std::endl; result.clear(); result = viennacl::linalg::prod( hyb_lhs, viennacl::trans(rhs2)); temp.clear(); viennacl::copy( result, temp); check_matrices(ublas_result, temp, epsilon); /******************************************************************/ if(retVal == EXIT_SUCCESS) { std::cout << "Tests passed successfully" << std::endl; } return retVal; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Sparse-Dense Matrix Multiplication" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = static_cast(1E-4); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: row-major, row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: row-major, column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: column-major, row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; std::cout << " layout: column-major, column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: row-major, row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: row-major, column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: column-major, row-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; std::cout << " layout: column-major, column-major" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } #ifdef VIENNACL_WITH_OPENCL else std::cout << "No double precision support, skipping test..." << std::endl; #endif std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/blas3_solve_float_double.hpp000644 001750 001750 00000044235 12267307531 023373 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ //#define NDEBUG //#define VIENNACL_DEBUG_BUILD // We don't need debug mode in UBLAS: #define BOOST_UBLAS_NDEBUG // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL //#define VIENNACL_DEBUG_BUILD #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/matrix_proxy.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "examples/tutorial/Random.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(ublas::vector & v1, viennacl::vector & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); viennacl::backend::finish(); for (std::size_t i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return norm_inf(v2_cpu); } template ScalarType diff(ublas::matrix & mat1, VCLMatrixType & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } // // Triangular solvers // template void run_solver_check(RHSTypeRef & B_ref, RHSTypeCheck & B_check, int & retval, Epsilon const & epsilon) { double act_diff = std::fabs(diff(B_ref, B_check)); if( act_diff > epsilon ) { std::cout << " FAILED!" << std::endl; std::cout << "# Error at operation: matrix-matrix solve" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << " passed! " << act_diff << std::endl; } template< typename NumericT, typename Epsilon, typename ReferenceMatrixTypeA, typename ReferenceMatrixTypeB, typename ReferenceMatrixTypeC, typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC, typename MatrixTypeResult> int test_solve(Epsilon const& epsilon, ReferenceMatrixTypeA const & A, ReferenceMatrixTypeB const & B_start, ReferenceMatrixTypeC const & C_start, MatrixTypeA const & vcl_A, MatrixTypeB & vcl_B, MatrixTypeC & vcl_C, MatrixTypeResult const & ) { int retval = EXIT_SUCCESS; // -------------------------------------------------------------------------- ReferenceMatrixTypeA result; ReferenceMatrixTypeC C_trans; ReferenceMatrixTypeB B = B_start; ReferenceMatrixTypeC C = C_start; MatrixTypeResult vcl_result; // Test: A \ B with various tags -------------------------------------------------------------------------- std::cout << "Testing A \\ B: " << std::endl; std::cout << " * upper_tag: "; result = ublas::solve(A, B, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_upper_tag: "; result = ublas::solve(A, B, ublas::unit_upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::unit_upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * lower_tag: "; result = ublas::solve(A, B, ublas::lower_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_lower_tag: "; result = ublas::solve(A, B, ublas::unit_lower_tag()); vcl_result = viennacl::linalg::solve(vcl_A, vcl_B, viennacl::linalg::unit_lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A \\ B passed!" << std::endl; B = B_start; C = C_start; // Test: A \ B^T -------------------------------------------------------------------------- std::cout << "Testing A \\ B^T: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); //check solve(): result = ublas::solve(A, C_trans, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(vcl_A, trans(vcl_C), viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); //check compute kernels: std::cout << " * upper_tag: "; ublas::inplace_solve(A, C_trans, ublas::upper_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::unit_upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::lower_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(A, C_trans, ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(vcl_A, trans(vcl_C), viennacl::linalg::unit_lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A \\ B^T passed!" << std::endl; B = B_start; C = C_start; // Test: A \ B with various tags -------------------------------------------------------------------------- std::cout << "Testing A^T \\ B: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::unit_upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::unit_upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::lower_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(B, vcl_B); result = ublas::solve(trans(A), B, ublas::unit_lower_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), vcl_B, viennacl::linalg::unit_lower_tag()); run_solver_check(result, vcl_result, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A^T \\ B passed!" << std::endl; B = B_start; C = C_start; // Test: A^T \ B^T -------------------------------------------------------------------------- std::cout << "Testing A^T \\ B^T: " << std::endl; std::cout << " * upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); //check solve(): result = ublas::solve(trans(A), C_trans, ublas::upper_tag()); vcl_result = viennacl::linalg::solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::upper_tag()); run_solver_check(result, vcl_result, retval, epsilon); //check kernels: std::cout << " * upper_tag: "; ublas::inplace_solve(trans(A), C_trans, ublas::upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_upper_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::unit_upper_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::unit_upper_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); std::cout << " * unit_lower_tag: "; viennacl::copy(C, vcl_C); C_trans = trans(C); ublas::inplace_solve(trans(A), C_trans, ublas::unit_lower_tag()); viennacl::linalg::inplace_solve(trans(vcl_A), trans(vcl_C), viennacl::linalg::unit_lower_tag()); C = trans(C_trans); run_solver_check(C, vcl_C, retval, epsilon); if (retval == EXIT_SUCCESS) std::cout << "Test A^T \\ B^T passed!" << std::endl; return retval; } template< typename NumericT, typename F_A, typename F_B, typename Epsilon > int test_solve(Epsilon const& epsilon) { int ret = EXIT_SUCCESS; long matrix_size = 135; //some odd number, not too large long rhs_num = 67; std::cout << "--- Part 2: Testing matrix-matrix solver ---" << std::endl; ublas::matrix A(matrix_size, matrix_size); ublas::matrix B_start(matrix_size, rhs_num); ublas::matrix C_start(rhs_num, matrix_size); for (std::size_t i = 0; i < A.size1(); ++i) { for (std::size_t j = 0; j < A.size2(); ++j) A(i,j) = static_cast(-0.5) * random(); A(i,i) = NumericT(1.0) + NumericT(2.0) * random(); //some extra weight on diagonal for stability } for (std::size_t i = 0; i < B_start.size1(); ++i) for (std::size_t j = 0; j < B_start.size2(); ++j) B_start(i,j) = random(); for (std::size_t i = 0; i < C_start.size1(); ++i) for (std::size_t j = 0; j < C_start.size2(); ++j) C_start(i,j) = random(); // A viennacl::range range1_A(matrix_size, 2*matrix_size); viennacl::range range2_A(2*matrix_size, 3*matrix_size); viennacl::slice slice1_A(matrix_size, 2, matrix_size); viennacl::slice slice2_A(0, 3, matrix_size); viennacl::matrix vcl_A(matrix_size, matrix_size); viennacl::copy(A, vcl_A); viennacl::matrix vcl_big_range_A(4*matrix_size, 4*matrix_size); viennacl::matrix_range > vcl_range_A(vcl_big_range_A, range1_A, range2_A); viennacl::copy(A, vcl_range_A); viennacl::matrix vcl_big_slice_A(4*matrix_size, 4*matrix_size); viennacl::matrix_slice > vcl_slice_A(vcl_big_slice_A, slice1_A, slice2_A); viennacl::copy(A, vcl_slice_A); // B viennacl::range range1_B(matrix_size, 2*matrix_size); viennacl::range range2_B(2*rhs_num, 3*rhs_num); viennacl::slice slice1_B(matrix_size, 2, matrix_size); viennacl::slice slice2_B(0, 3, rhs_num); viennacl::matrix vcl_B(matrix_size, rhs_num); viennacl::copy(B_start, vcl_B); viennacl::matrix vcl_big_range_B(4*matrix_size, 4*rhs_num); viennacl::matrix_range > vcl_range_B(vcl_big_range_B, range1_B, range2_B); viennacl::copy(B_start, vcl_range_B); viennacl::matrix vcl_big_slice_B(4*matrix_size, 4*rhs_num); viennacl::matrix_slice > vcl_slice_B(vcl_big_slice_B, slice1_B, slice2_B); viennacl::copy(B_start, vcl_slice_B); // C viennacl::range range1_C(rhs_num, 2*rhs_num); viennacl::range range2_C(2*matrix_size, 3*matrix_size); viennacl::slice slice1_C(rhs_num, 2, rhs_num); viennacl::slice slice2_C(0, 3, matrix_size); viennacl::matrix vcl_C(rhs_num, matrix_size); viennacl::copy(C_start, vcl_C); viennacl::matrix vcl_big_range_C(4*rhs_num, 4*matrix_size); viennacl::matrix_range > vcl_range_C(vcl_big_range_C, range1_C, range2_C); viennacl::copy(C_start, vcl_range_C); viennacl::matrix vcl_big_slice_C(4*rhs_num, 4*matrix_size); viennacl::matrix_slice > vcl_slice_C(vcl_big_slice_C, slice1_C, slice2_C); viennacl::copy(C_start, vcl_slice_C); std::cout << "Now using A=matrix, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=matrix, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=matrix, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=range, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_range_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=matrix" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_B, vcl_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=range" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_range_B, vcl_range_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; std::cout << "Now using A=slice, B=slice" << std::endl; ret = test_solve(epsilon, A, B_start, C_start, vcl_slice_A, vcl_slice_B, vcl_slice_C, vcl_B ); if (ret != EXIT_SUCCESS) return ret; return ret; } // // Control functions // template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=row ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=col ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=row ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=col ///" << std::endl; std::cout << "////////////////////////////////" << std::endl; ret = test_solve(epsilon); if (ret != EXIT_SUCCESS) return ret; return ret; } ViennaCL-1.5.1-src/tests/src/scheduler_matrix_matrix.cpp000644 001750 001750 00000076513 12267307531 023365 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ //#define NDEBUG //#define VIENNACL_DEBUG_BUILD // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL //#define VIENNACL_DEBUG_BUILD #define VIENNACL_WITH_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/matrix_proxy.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "examples/tutorial/Random.hpp" #include "viennacl/scheduler/execute.hpp" #include "viennacl/scheduler/io.hpp" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(ublas::vector & v1, viennacl::vector & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (std::size_t i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return norm_inf(v2_cpu); } template ScalarType diff(ublas::matrix & mat1, VCLMatrixType & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } // // Part 1: Matrix-matrix multiplications // template< typename NumericT, typename Epsilon, typename ReferenceMatrixTypeA, typename ReferenceMatrixTypeB, typename ReferenceMatrixTypeC, typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC> int test_prod(Epsilon const& epsilon, ReferenceMatrixTypeA const & A, ReferenceMatrixTypeA const & A_trans, ReferenceMatrixTypeB const & B, ReferenceMatrixTypeB const & B_trans, ReferenceMatrixTypeC & C, MatrixTypeA const & vcl_A, MatrixTypeA const & vcl_A_trans, MatrixTypeB const & vcl_B, MatrixTypeB const & vcl_B_trans, MatrixTypeC & vcl_C ) { int retval = EXIT_SUCCESS; NumericT act_diff = 0; // Test: C +-= A * B -------------------------------------------------------------------------- C = viennacl::linalg::prod(A, B); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(vcl_A, vcl_B)); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = A * B passed!" << std::endl; C += viennacl::linalg::prod(A, B); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), viennacl::linalg::prod(vcl_A, vcl_B)); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C += A * B passed!" << std::endl; C -= viennacl::linalg::prod(A, B); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_sub(), viennacl::linalg::prod(vcl_A, vcl_B)); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C -= A * B passed!" << std::endl; // Test: C +-= A * trans(B) -------------------------------------------------------------------------- C = boost::numeric::ublas::prod(A, trans(B_trans)); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(vcl_A, trans(vcl_B_trans))); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = A * trans(B) passed!" << std::endl; C += boost::numeric::ublas::prod(A, trans(B_trans)); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), viennacl::linalg::prod(vcl_A, trans(vcl_B_trans))); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C += A * trans(B) passed!" << std::endl; C -= boost::numeric::ublas::prod(A, trans(B_trans)); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_sub(), viennacl::linalg::prod(vcl_A, trans(vcl_B_trans))); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C -= A * trans(B) passed!" << std::endl; // Test: C +-= trans(A) * B -------------------------------------------------------------------------- C = boost::numeric::ublas::prod(trans(A_trans), B); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(trans(vcl_A_trans), vcl_B)); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = trans(A) * B passed!" << std::endl; C += boost::numeric::ublas::prod(trans(A_trans), B); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), viennacl::linalg::prod(trans(vcl_A_trans), vcl_B)); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C += trans(A) * B passed!" << std::endl; C -= boost::numeric::ublas::prod(trans(A_trans), B); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_sub(), viennacl::linalg::prod(trans(vcl_A_trans), vcl_B)); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C -= trans(A) * B passed!" << std::endl; // Test: C +-= trans(A) * trans(B) -------------------------------------------------------------------------- C = boost::numeric::ublas::prod(trans(A_trans), trans(B_trans)); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans))); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = trans(A) * trans(B) passed!" << std::endl; C += boost::numeric::ublas::prod(trans(A_trans), trans(B_trans)); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_add(), viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans))); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C += trans(A) * trans(B) passed!" << std::endl; C -= boost::numeric::ublas::prod(trans(A_trans), trans(B_trans)); { viennacl::scheduler::statement my_statement(vcl_C, viennacl::op_inplace_sub(), viennacl::linalg::prod(trans(vcl_A_trans), trans(vcl_B_trans))); viennacl::scheduler::execute(my_statement); } act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C -= trans(A) * trans(B) passed!" << std::endl; return retval; } template< typename NumericT, typename F_A, typename F_B, typename F_C, typename Epsilon > int test_prod(Epsilon const& epsilon) { int ret; std::size_t matrix_size1 = 29; //some odd number, not too large std::size_t matrix_size2 = 47; //some odd number, not too large std::size_t matrix_size3 = 33; //some odd number, not too large //std::size_t matrix_size1 = 128; //some odd number, not too large //std::size_t matrix_size2 = 64; //some odd number, not too large //std::size_t matrix_size3 = 128; //some odd number, not too large //std::size_t matrix_size1 = 256; // for testing AMD kernels //std::size_t matrix_size2 = 256; // for testing AMD kernels //std::size_t matrix_size3 = 256; // for testing AMD kernels // -------------------------------------------------------------------------- // ublas reference: ublas::matrix A(matrix_size1, matrix_size2); ublas::matrix big_A = ublas::scalar_matrix(4*matrix_size1, 4*matrix_size2, NumericT(3.1415)); ublas::matrix B(matrix_size2, matrix_size3); ublas::matrix big_B = ublas::scalar_matrix(4*matrix_size2, 4*matrix_size3, NumericT(42.0)); ublas::matrix C(matrix_size1, matrix_size3); //fill A and B: for (unsigned int i = 0; i < A.size1(); ++i) for (unsigned int j = 0; j < A.size2(); ++j) A(i,j) = static_cast(0.1) * random(); for (unsigned int i = 0; i < B.size1(); ++i) for (unsigned int j = 0; j < B.size2(); ++j) B(i,j) = static_cast(0.1) * random(); ublas::matrix A_trans = trans(A); ublas::matrix big_A_trans = trans(big_A); ublas::matrix B_trans = trans(B); ublas::matrix big_B_trans = trans(big_B); // // ViennaCL objects // // A viennacl::range range1_A(matrix_size1, 2*matrix_size1); viennacl::range range2_A(matrix_size2, 2*matrix_size2); viennacl::slice slice1_A(matrix_size1, 2, matrix_size1); viennacl::slice slice2_A(matrix_size2, 3, matrix_size2); viennacl::matrix vcl_A(matrix_size1, matrix_size2); viennacl::copy(A, vcl_A); viennacl::matrix vcl_big_range_A(4*matrix_size1, 4*matrix_size2); viennacl::matrix_range > vcl_range_A(vcl_big_range_A, range1_A, range2_A); viennacl::copy(A, vcl_range_A); viennacl::matrix vcl_big_slice_A(4*matrix_size1, 4*matrix_size2); viennacl::matrix_slice > vcl_slice_A(vcl_big_slice_A, slice1_A, slice2_A); viennacl::copy(A, vcl_slice_A); // A^T viennacl::matrix vcl_A_trans(matrix_size2, matrix_size1); viennacl::copy(A_trans, vcl_A_trans); viennacl::matrix vcl_big_range_A_trans(4*matrix_size2, 4*matrix_size1); viennacl::matrix_range > vcl_range_A_trans(vcl_big_range_A_trans, range2_A, range1_A); viennacl::copy(A_trans, vcl_range_A_trans); viennacl::matrix vcl_big_slice_A_trans(4*matrix_size2, 4*matrix_size1); viennacl::matrix_slice > vcl_slice_A_trans(vcl_big_slice_A_trans, slice2_A, slice1_A); viennacl::copy(A_trans, vcl_slice_A_trans); // B viennacl::range range1_B(2*matrix_size2, 3*matrix_size2); viennacl::range range2_B(2*matrix_size3, 3*matrix_size3); viennacl::slice slice1_B(matrix_size2, 3, matrix_size2); viennacl::slice slice2_B(matrix_size3, 2, matrix_size3); viennacl::matrix vcl_B(matrix_size2, matrix_size3); viennacl::copy(B, vcl_B); viennacl::matrix vcl_big_range_B(4*matrix_size2, 4*matrix_size3); viennacl::matrix_range > vcl_range_B(vcl_big_range_B, range1_B, range2_B); viennacl::copy(B, vcl_range_B); viennacl::matrix vcl_big_slice_B(4*matrix_size2, 4*matrix_size3); viennacl::matrix_slice > vcl_slice_B(vcl_big_slice_B, slice1_B, slice2_B); viennacl::copy(B, vcl_slice_B); // B^T viennacl::matrix vcl_B_trans(matrix_size3, matrix_size2); viennacl::copy(B_trans, vcl_B_trans); viennacl::matrix vcl_big_range_B_trans(4*matrix_size3, 4*matrix_size2); viennacl::matrix_range > vcl_range_B_trans(vcl_big_range_B_trans, range2_B, range1_B); viennacl::copy(B_trans, vcl_range_B_trans); viennacl::matrix vcl_big_slice_B_trans(4*matrix_size3, 4*matrix_size2); viennacl::matrix_slice > vcl_slice_B_trans(vcl_big_slice_B_trans, slice2_B, slice1_B); viennacl::copy(B_trans, vcl_slice_B_trans); // C viennacl::range range1_C(matrix_size1-1, 2*matrix_size1-1); viennacl::range range2_C(matrix_size3-1, 2*matrix_size3-1); viennacl::slice slice1_C(matrix_size1-1, 3, matrix_size1); viennacl::slice slice2_C(matrix_size3-1, 3, matrix_size3); viennacl::matrix vcl_C(matrix_size1, matrix_size3); viennacl::matrix vcl_big_range_C(4*matrix_size1, 4*matrix_size3); viennacl::matrix_range > vcl_range_C(vcl_big_range_C, range1_C, range2_C); viennacl::matrix vcl_big_slice_C(4*matrix_size1, 4*matrix_size3); viennacl::matrix_slice > vcl_slice_C(vcl_big_slice_C, slice1_C, slice2_C); std::cout << "--- Part 1: Testing matrix-matrix products ---" << std::endl; ////// ////// A: matrix ////// // // std::cout << "Now using A=matrix, B=matrix, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_B, vcl_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=matrix, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_B, vcl_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=matrix, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_B, vcl_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=range, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_range_B, vcl_range_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=range, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_range_B, vcl_range_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=range, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_range_B, vcl_range_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=slice, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=slice, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=slice, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; ////// ////// A: range ////// // // std::cout << "Now using A=range, B=matrix, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_B, vcl_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=matrix, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_B, vcl_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=matrix, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_B, vcl_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=range, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_range_B, vcl_range_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=range, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_range_B, vcl_range_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=range, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_range_B, vcl_range_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=slice, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=slice, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=slice, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; ////// ////// A: slice ////// // // std::cout << "Now using A=slice, B=matrix, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_B, vcl_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=matrix, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_B, vcl_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=matrix, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_B, vcl_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=range, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_range_B, vcl_range_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=range, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_range_B, vcl_range_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=range, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_range_B, vcl_range_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=slice, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=slice, C=range" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=slice, C=slice" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; return ret; } // // Control functions // template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=row, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=row, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=col, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=col, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=row, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=row, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=col, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=col, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; return ret; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: BLAS 3 routines" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/Random.hpp000644 001750 001750 00000002327 12267307531 017654 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #ifndef _RANDOM_HPP_ #define _RANDOM_HPP_ #include #include inline void init() { static bool init = false; if (!init) { srand( (unsigned int)time(NULL) ); init = true; } } template TYPE random(); template<> double random() { init(); return static_cast(rand()) / static_cast(RAND_MAX); } template<> float random() { init(); return static_cast(random()); } #endif ViennaCL-1.5.1-src/tests/src/vector_float.cu000644 001750 001750 00000004116 12267307531 020741 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "vector_float_double.hpp" // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = static_cast(1.0E-2); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/scalar.cpp000644 001750 001750 00000033456 12267307531 017703 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include #include // // *** ViennaCL // #include "viennacl/scalar.hpp" // // ------------------------------------------------------------- // template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; NumericT s1 = NumericT(3.1415926); NumericT s2 = NumericT(2.71763); NumericT s3 = NumericT(42); viennacl::scalar vcl_s1; viennacl::scalar vcl_s2; viennacl::scalar vcl_s3 = 1.0; vcl_s1 = s1; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: vcl_s1 = s1;" << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } vcl_s2 = s2; if( fabs(diff(s2, vcl_s2)) > epsilon ) { std::cout << "# Error at operation: vcl_s2 = s2;" << std::endl; std::cout << " diff: " << fabs(diff(s2, vcl_s2)) << std::endl; retval = EXIT_FAILURE; } vcl_s3 = s3; if( s3 != vcl_s3 ) { std::cout << "# Error at operation: vcl_s3 = s3;" << std::endl; std::cout << " diff: " << s3 - vcl_s3 << std::endl; retval = EXIT_FAILURE; } NumericT tmp = s2; s2 = s1; s1 = tmp; viennacl::linalg::swap(vcl_s1, vcl_s2); if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: swap " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2; vcl_s1 += vcl_s2; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: += " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 *= s3; vcl_s1 *= vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: *= " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2; vcl_s1 -= vcl_s2; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: -= " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 /= s3; vcl_s1 /= vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: /= " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = vcl_s1; s1 = s2 + s3; vcl_s1 = vcl_s2 + vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 + s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 + s3; vcl_s1 += vcl_s2 + vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 + s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 + s3; vcl_s1 -= vcl_s2 + vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 + s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 - s3; vcl_s1 = vcl_s2 - vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 - s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 - s3; vcl_s1 += vcl_s2 - vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 - s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 - s3; vcl_s1 -= vcl_s2 - vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 - s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 * s3; vcl_s1 = vcl_s2 * vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 * s3; vcl_s1 += vcl_s2 * vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 * s3; vcl_s1 -= vcl_s2 * vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 / s3; vcl_s1 = vcl_s2 / vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 / s3; vcl_s1 += vcl_s2 / vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 / s3; vcl_s1 -= vcl_s2 / vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } // addition with factors, = vcl_s1 = s1; s1 = s2 * s2 + s3 * s3; vcl_s1 = vcl_s2 * s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 * s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } vcl_s1 = vcl_s2 * vcl_s2 + vcl_s3 * vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 * s2 + s3 * s3, second test " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 * s2 + s3 / s3; vcl_s1 = vcl_s2 * s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 * s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } vcl_s1 = vcl_s2 * vcl_s2 + vcl_s3 / vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 * s2 + s3 / s3, second test " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 / s2 + s3 * s3; vcl_s1 = vcl_s2 / s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 / s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } vcl_s1 = vcl_s2 / vcl_s2 + vcl_s3 * vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 / s2 + s3 * s3, second test " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 = s2 / s2 + s3 / s3; vcl_s1 = vcl_s2 / s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 / s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } vcl_s1 = vcl_s2 / vcl_s2 + vcl_s3 / vcl_s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 = s2 / s2 + s3 / s3, second test " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } // addition with factors, += vcl_s1 = s1; s1 += s2 * s2 + s3 * s3; vcl_s1 += vcl_s2 * s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 * s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 * s2 + s3 / s3; vcl_s1 += vcl_s2 * s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 * s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 / s2 + s3 * s3; vcl_s1 += vcl_s2 / s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 / s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 += s2 / s2 + s3 / s3; vcl_s1 += vcl_s2 / s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 += s2 / s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } // addition with factors, -= vcl_s1 = s1; s1 -= s2 * s2 + s3 * s3; vcl_s1 -= vcl_s2 * s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 * s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 * s2 + s3 / s3; vcl_s1 -= vcl_s2 * s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 * s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 / s2 + s3 * s3; vcl_s1 -= vcl_s2 / s2 + vcl_s3 * s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 / s2 + s3 * s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } s1 -= s2 / s2 + s3 / s3; vcl_s1 -= vcl_s2 / s2 + vcl_s3 / s3; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: s1 -= s2 / s2 + s3 / s3 " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } // lenghty expression: s1 = s2 + s3 * s2 - s3 / s1; vcl_s1 = vcl_s2 + vcl_s3 * vcl_s2 - vcl_s3 / vcl_s1; if( fabs(diff(s1, vcl_s1)) > epsilon ) { std::cout << "# Error at operation: + * - / " << std::endl; std::cout << " diff: " << fabs(diff(s1, vcl_s1)) << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Scalar" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-5); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-10; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } // // ------------------------------------------------------------- // ViennaCL-1.5.1-src/tests/src/external_2.cu000644 001750 001750 00000004631 12267307531 020317 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // A check for the absence of external linkage (otherwise, library is not truly 'header-only') // //#define VIENNACL_WITH_EIGEN #define VIENNACL_WITH_UBLAS // // *** System // #include // // *** ViennaCL // #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/coordinate_matrix.hpp" #include "viennacl/ell_matrix.hpp" #include "viennacl/hyb_matrix.hpp" #ifdef VIENNACL_WITH_OPENCL #include "viennacl/circulant_matrix.hpp" #include "viennacl/hankel_matrix.hpp" #include "viennacl/toeplitz_matrix.hpp" #include "viennacl/vandermonde_matrix.hpp" #endif #include "viennacl/linalg/ilu.hpp" #include "viennacl/linalg/row_scaling.hpp" #include "viennacl/linalg/jacobi_precond.hpp" #include "viennacl/linalg/cg.hpp" #include "viennacl/linalg/bicgstab.hpp" #include "viennacl/linalg/gmres.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "viennacl/linalg/qr.hpp" #include "viennacl/misc/bandwidth_reduction.hpp" #ifdef VIENNACL_WITH_OPENCL #include "viennacl/linalg/amg.hpp" #include "viennacl/linalg/spai.hpp" #include "viennacl/linalg/svd.hpp" #include "viennacl/fft.hpp" #include "viennacl/generator/generate.hpp" #endif #include "viennacl/io/matrix_market.hpp" #include "viennacl/scheduler/execute.hpp" void other_func() { typedef float NumericType; //doing nothing but instantiating a few types viennacl::scalar s; viennacl::vector v(10); viennacl::matrix m(10, 10); viennacl::compressed_matrix compr(10, 10); viennacl::coordinate_matrix coord(10, 10); } ViennaCL-1.5.1-src/tests/src/matrix_row_int.cpp000644 001750 001750 00000003525 12267307531 021475 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_int.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, row-major, integers " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: int" << std::endl; std::cout << " --- row-major ---" << std::endl; if (run_test() != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: long" << std::endl; std::cout << " --- row-major ---" << std::endl; if (run_test() != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/matrix_col_int.cu000644 001750 001750 00000003544 12267307531 021271 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_int.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, column-major, integers " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: int" << std::endl; std::cout << " --- column-major ---" << std::endl; if (run_test() != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: long" << std::endl; std::cout << " --- column-major ---" << std::endl; if (run_test() != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/vector_multi_inner_prod.cu000644 001750 001750 00000055633 12267307531 023217 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include // // *** Boost // #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/vector.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/inner_prod.hpp" #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #include "Random.hpp" using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::scalar const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::entry_proxy const & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } // // ------------------------------------------------------------- // template ScalarType diff(ublas::vector const & v1, ViennaCLVectorType const & vcl_vec) { ublas::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return ublas::norm_inf(v2_cpu); } template ScalarType diff(ublas::vector_slice > const & v1, ViennaCLVectorType const & vcl_vec) { ublas::vector v2_cpu(vcl_vec.size()); viennacl::backend::finish(); viennacl::copy(vcl_vec, v2_cpu); for (unsigned int i=0;i 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return ublas::norm_inf(v2_cpu); } template int check(T1 const & t1, T2 const & t2, double epsilon) { int retval = EXIT_SUCCESS; double temp = std::fabs(diff(t1, t2)); if (temp > epsilon) { std::cout << "# Error! Relative difference: " << temp << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename Epsilon, typename UblasVectorType1, typename UblasVectorType2, typename UblasVectorType3, typename UblasVectorType4, typename ViennaCLVectorType1, typename ViennaCLVectorType2, typename ViennaCLVectorType3, typename ViennaCLVectorType4 > int test(Epsilon const& epsilon, UblasVectorType1 & ublas_v1, UblasVectorType2 & ublas_v2, UblasVectorType3 & ublas_v3, UblasVectorType4 & ublas_v4, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2, ViennaCLVectorType3 & vcl_v3, ViennaCLVectorType4 & vcl_v4) { int retval = EXIT_SUCCESS; for (std::size_t i=0; i(); ublas_v2[i] = NumericT(1.0) + random(); ublas_v3[i] = NumericT(1.0) + random(); ublas_v4[i] = NumericT(1.0) + random(); } viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); viennacl::copy(ublas_v3.begin(), ublas_v3.end(), vcl_v3.begin()); viennacl::copy(ublas_v4.begin(), ublas_v4.end(), vcl_v4.begin()); std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v3, vcl_v3, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v4, vcl_v4, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; ublas::vector ref_result = ublas::scalar_vector(40, 0.0); viennacl::vector result = viennacl::scalar_vector(40, 0.0); std::cout << "Testing inner_prod with two vectors..." << std::endl; ref_result(2) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v2); viennacl::project(result, viennacl::slice(2, 3, 2)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4); viennacl::project(result, viennacl::slice(3, 4, 2)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v4)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with three vectors..." << std::endl; ref_result(1) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v3); viennacl::project(result, viennacl::slice(1, 2, 3)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(2) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(10) = ublas::inner_prod(ublas_v1, ublas_v4); viennacl::project(result, viennacl::slice(2, 4, 3)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with four vectors..." << std::endl; ref_result(4) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4); viennacl::project(result, viennacl::slice(4, 1, 4)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3, vcl_v4)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(12) = ublas::inner_prod(ublas_v1, ublas_v1); viennacl::project(result, viennacl::slice(3, 3, 4)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4, vcl_v1)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with five vectors..." << std::endl; ref_result(1) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v2); viennacl::project(result, viennacl::slice(1, 2, 5)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v1, vcl_v2, vcl_v3, vcl_v4, vcl_v2)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(2) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(4) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(6) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(8) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(10) = ublas::inner_prod(ublas_v1, ublas_v2); viennacl::project(result, viennacl::slice(2, 2, 5)) = viennacl::linalg::inner_prod(vcl_v1, viennacl::tie(vcl_v3, vcl_v2, vcl_v4, vcl_v1, vcl_v2)); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } std::cout << "Testing inner_prod with eight vectors..." << std::endl; ref_result(1) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(13) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(17) = ublas::inner_prod(ublas_v1, ublas_v3); ref_result(21) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(25) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(29) = ublas::inner_prod(ublas_v1, ublas_v2); std::vector const *> vecs1(8); vecs1[0] = &vcl_v1; vecs1[1] = &vcl_v2; vecs1[2] = &vcl_v3; vecs1[3] = &vcl_v4; vecs1[4] = &vcl_v3; vecs1[5] = &vcl_v2; vecs1[6] = &vcl_v1; vecs1[7] = &vcl_v2; viennacl::vector_tuple tuple1(vecs1); viennacl::project(result, viennacl::slice(1, 4, 8)) = viennacl::linalg::inner_prod(vcl_v1, tuple1); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } ref_result(3) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(5) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(7) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(9) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(11) = ublas::inner_prod(ublas_v1, ublas_v2); ref_result(13) = ublas::inner_prod(ublas_v1, ublas_v1); ref_result(15) = ublas::inner_prod(ublas_v1, ublas_v4); ref_result(17) = ublas::inner_prod(ublas_v1, ublas_v2); std::vector const *> vecs2(8); vecs2[0] = &vcl_v2; vecs2[1] = &vcl_v4; vecs2[2] = &vcl_v1; vecs2[3] = &vcl_v2; vecs2[4] = &vcl_v2; vecs2[5] = &vcl_v1; vecs2[6] = &vcl_v4; vecs2[7] = &vcl_v2; viennacl::vector_tuple tuple2(vecs2); viennacl::project(result, viennacl::slice(3, 2, 8)) = viennacl::linalg::inner_prod(vcl_v1, tuple2); if (check(ref_result, result, epsilon) != EXIT_SUCCESS) { std::cout << ref_result << std::endl; std::cout << result << std::endl; return EXIT_FAILURE; } // -------------------------------------------------------------------------- return retval; } template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int retval = EXIT_SUCCESS; std::size_t size = 8 * 1337; std::cout << "Running tests for vector of size " << size << std::endl; // // Set up UBLAS objects // ublas::vector ublas_full_vec1(size); ublas::vector ublas_full_vec2(ublas_full_vec1.size()); for (std::size_t i=0; i(); ublas_full_vec2[i] = NumericT(1.0) + random(); } ublas::slice s1( ublas_full_vec1.size() / 8, 3, ublas_full_vec1.size() / 8); ublas::slice s2(2 * ublas_full_vec2.size() / 8, 1, ublas_full_vec2.size() / 8); ublas::slice s3(4 * ublas_full_vec1.size() / 8, 2, ublas_full_vec1.size() / 8); ublas::slice s4(3 * ublas_full_vec2.size() / 8, 4, ublas_full_vec2.size() / 8); ublas::vector_slice< ublas::vector > ublas_slice_vec1(ublas_full_vec1, s1); ublas::vector_slice< ublas::vector > ublas_slice_vec2(ublas_full_vec2, s2); ublas::vector_slice< ublas::vector > ublas_slice_vec3(ublas_full_vec1, s3); ublas::vector_slice< ublas::vector > ublas_slice_vec4(ublas_full_vec2, s4); // // Set up ViennaCL objects // viennacl::vector vcl_full_vec1(ublas_full_vec1.size()); viennacl::vector vcl_full_vec2(ublas_full_vec2.size()); viennacl::fast_copy(ublas_full_vec1.begin(), ublas_full_vec1.end(), vcl_full_vec1.begin()); viennacl::copy (ublas_full_vec2.begin(), ublas_full_vec2.end(), vcl_full_vec2.begin()); viennacl::slice vcl_s1( vcl_full_vec1.size() / 8, 3, vcl_full_vec1.size() / 8); viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 8, 1, vcl_full_vec2.size() / 8); viennacl::slice vcl_s3(4 * vcl_full_vec1.size() / 8, 2, vcl_full_vec1.size() / 8); viennacl::slice vcl_s4(3 * vcl_full_vec2.size() / 8, 4, vcl_full_vec2.size() / 8); viennacl::vector_slice< viennacl::vector > vcl_slice_vec1(vcl_full_vec1, vcl_s1); viennacl::vector_slice< viennacl::vector > vcl_slice_vec2(vcl_full_vec2, vcl_s2); viennacl::vector_slice< viennacl::vector > vcl_slice_vec3(vcl_full_vec1, vcl_s3); viennacl::vector_slice< viennacl::vector > vcl_slice_vec4(vcl_full_vec2, vcl_s4); viennacl::vector vcl_short_vec1(vcl_slice_vec1); viennacl::vector vcl_short_vec2 = vcl_slice_vec2; viennacl::vector vcl_short_vec3 = vcl_slice_vec2 + vcl_slice_vec1; viennacl::vector vcl_short_vec4 = vcl_short_vec1 + vcl_slice_vec2; ublas::vector ublas_short_vec1(ublas_slice_vec1); ublas::vector ublas_short_vec2(ublas_slice_vec2); ublas::vector ublas_short_vec3 = ublas_slice_vec2 + ublas_slice_vec1; ublas::vector ublas_short_vec4 = ublas_short_vec1 + ublas_slice_vec2; std::cout << "Testing creation of vectors from slice..." << std::endl; if (check(ublas_short_vec1, vcl_short_vec1, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec3, vcl_short_vec3, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec4, vcl_short_vec4, epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; // // Now start running tests for vectors, ranges and slices: // std::cout << " ** [vector|vector|vector|vector] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_short_vec2, ublas_short_vec2, ublas_short_vec2, vcl_short_vec1, vcl_short_vec2, vcl_short_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|vector|vector|slice] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_short_vec2, ublas_short_vec2, ublas_slice_vec2, vcl_short_vec1, vcl_short_vec2, vcl_short_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|vector|slice|vector] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_short_vec2, ublas_slice_vec2, ublas_short_vec2, vcl_short_vec1, vcl_short_vec2, vcl_slice_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|vector|slice|slice] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_short_vec2, ublas_slice_vec2, ublas_slice_vec2, vcl_short_vec1, vcl_short_vec2, vcl_slice_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|slice|vector|vector] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_slice_vec2, ublas_short_vec2, ublas_short_vec2, vcl_short_vec1, vcl_slice_vec2, vcl_short_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|slice|vector|slice] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_slice_vec2, ublas_short_vec2, ublas_slice_vec2, vcl_short_vec1, vcl_slice_vec2, vcl_short_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|slice|slice|vector] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_slice_vec2, ublas_slice_vec2, ublas_short_vec2, vcl_short_vec1, vcl_slice_vec2, vcl_slice_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [vector|slice|slice|slice] **" << std::endl; retval = test(epsilon, ublas_short_vec1, ublas_slice_vec2, ublas_slice_vec2, ublas_slice_vec2, vcl_short_vec1, vcl_slice_vec2, vcl_slice_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; ////////////////// std::cout << " ** [slice|vector|vector|vector] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_short_vec2, ublas_short_vec2, ublas_short_vec2, vcl_slice_vec1, vcl_short_vec2, vcl_short_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|vector|vector|slice] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_short_vec2, ublas_short_vec2, ublas_slice_vec2, vcl_slice_vec1, vcl_short_vec2, vcl_short_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|vector|slice|vector] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_short_vec2, ublas_slice_vec2, ublas_short_vec2, vcl_slice_vec1, vcl_short_vec2, vcl_slice_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|vector|slice|slice] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_short_vec2, ublas_slice_vec2, ublas_slice_vec2, vcl_slice_vec1, vcl_short_vec2, vcl_slice_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|slice|vector|vector] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_slice_vec2, ublas_short_vec2, ublas_short_vec2, vcl_slice_vec1, vcl_slice_vec2, vcl_short_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|slice|vector|slice] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_slice_vec2, ublas_short_vec2, ublas_slice_vec2, vcl_slice_vec1, vcl_slice_vec2, vcl_short_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|slice|slice|vector] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_slice_vec2, ublas_slice_vec2, ublas_short_vec2, vcl_slice_vec1, vcl_slice_vec2, vcl_slice_vec3, vcl_short_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** [slice|slice|slice|slice] **" << std::endl; retval = test(epsilon, ublas_slice_vec1, ublas_slice_vec2, ublas_slice_vec2, ublas_slice_vec2, vcl_slice_vec1, vcl_slice_vec2, vcl_slice_vec3, vcl_slice_vec4); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; return EXIT_SUCCESS; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector multiple inner products" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = static_cast(1.0E-4); std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/tests/src/matrix_row_double.cpp000644 001750 001750 00000003440 12267307531 022151 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #include "matrix_float_double.hpp" int main (int, const char **) { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Matrix operations, row-major, double precision " << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { double epsilon = 1e-12; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; if (run_test(epsilon) != EXIT_SUCCESS) return EXIT_FAILURE; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; } ViennaCL-1.5.1-src/tests/src/generator_blas3.cpp000644 001750 001750 00000035752 12267307531 021511 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2012, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ #ifndef NDEBUG #define NDEBUG #endif // // *** System // #include // // *** Boost // #include #include #include #include #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL //#define VIENNACL_DEBUG_BUILD #define VIENNACL_HAVE_UBLAS 1 #include "viennacl/scalar.hpp" #include "viennacl/matrix.hpp" #include "viennacl/matrix_proxy.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/prod.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/direct_solve.hpp" #include "examples/tutorial/Random.hpp" #include "viennacl/generator/generate.hpp" #include "list" // // ------------------------------------------------------------- // using namespace boost::numeric; // // ------------------------------------------------------------- // static const unsigned int min_large_block_size = 32; static const unsigned int max_large_block_size = 128; static const unsigned int n_large_blocks = static_cast(std::log(static_cast(max_large_block_size/min_large_block_size))/std::log(2.0)+1.0); static const unsigned int min_alignment = 1; static const unsigned int max_alignment = 8; static const unsigned int max_small_block_size = max_alignment; // // ------------------------------------------------------------- template ScalarType diff(ScalarType & s1, viennacl::scalar & s2) { viennacl::backend::finish(); if (s1 != s2) return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2)); return 0; } template ScalarType diff(ublas::matrix & mat1, VCLMatrixType & mat2) { ublas::matrix mat2_cpu(mat2.size1(), mat2.size2()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(mat2, mat2_cpu); ScalarType ret = 0; ScalarType act = 0; for (unsigned int i = 0; i < mat2_cpu.size1(); ++i) { for (unsigned int j = 0; j < mat2_cpu.size2(); ++j) { act = std::fabs(mat2_cpu(i,j) - mat1(i,j)) / std::max( std::fabs(mat2_cpu(i, j)), std::fabs(mat1(i,j)) ); if (act > ret) ret = act; } } //std::cout << ret << std::endl; return ret; } // // Part 1: Matrix-matrix multiplications // template< typename NumericT, typename Epsilon, typename ReferenceMatrixTypeA, typename ReferenceMatrixTypeB, typename ReferenceMatrixTypeC, typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC> int test_prod(Epsilon const& epsilon, ReferenceMatrixTypeA const & A, ReferenceMatrixTypeA const & A_trans, ReferenceMatrixTypeB const & B, ReferenceMatrixTypeB const & B_trans, ReferenceMatrixTypeC & C, MatrixTypeA const & vcl_A, MatrixTypeA const & vcl_A_trans, MatrixTypeB const & vcl_B, MatrixTypeB const & vcl_B_trans, MatrixTypeC & vcl_C ) { int retval = EXIT_SUCCESS; NumericT act_diff = 0; NumericT alpha = NumericT(3.14); NumericT beta = NumericT(4.51); std::cout << "Testing C = alpha*prod(A,B) + beta*C ..." << std::endl; { C = alpha*viennacl::linalg::prod(A, B) + beta*C; viennacl::scheduler::statement statement(vcl_C, viennacl::op_assign(), alpha*viennacl::linalg::prod(vcl_A,vcl_B)+beta*vcl_C); viennacl::generator::generate_enqueue_statement(statement, statement.array()[0]); viennacl::backend::finish(); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = A * B passed!" << std::endl; } std::cout << "Testing C = alpha*trans(A) * B + beta*C ..." << std::endl; { C = alpha*boost::numeric::ublas::prod(trans(A_trans), B) + beta*C; viennacl::scheduler::statement statement(vcl_C, viennacl::op_assign(), alpha*viennacl::linalg::prod(trans(vcl_A_trans),vcl_B) + beta*vcl_C); viennacl::generator::generate_enqueue_statement(statement, statement.array()[0]); viennacl::backend::finish(); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = trans(A) * B passed!" << std::endl; } std::cout << "Testing C = alpha*A * trans(B) + beta*C ..." << std::endl; { C = boost::numeric::ublas::prod(A,trans(B_trans)) + beta*C; viennacl::scheduler::statement statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(vcl_A,trans(vcl_B_trans)) + beta*vcl_C); viennacl::generator::generate_enqueue_statement(statement, statement.array()[0]); viennacl::backend::finish(); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = A * trans(B) passed!" << std::endl; } std::cout << "Testing C = alpha*trans(A) * trans(B) + beta*C ..." << std::endl; { C = boost::numeric::ublas::prod(trans(A_trans), trans(B_trans)) + beta*C; viennacl::scheduler::statement statement(vcl_C, viennacl::op_assign(), viennacl::linalg::prod(trans(vcl_A_trans),trans(vcl_B_trans)) + beta*vcl_C); viennacl::generator::generate_enqueue_statement(statement, statement.array()[0]); viennacl::backend::finish(); act_diff = std::fabs(diff(C, vcl_C)); if( act_diff > epsilon ) { std::cout << "# Error at operation: matrix-matrix product" << std::endl; std::cout << " diff: " << act_diff << std::endl; retval = EXIT_FAILURE; } else std::cout << "Test C = trans(A) * trans(B) passed!" << std::endl; } return retval; } template< typename NumericT, typename F_A, typename F_B, typename F_C, typename Epsilon> int test_prod(Epsilon const& epsilon) { int ret; long matrix_size1 = 2*max_large_block_size; long matrix_size2 = 3*max_large_block_size; long matrix_size3 = 4*max_large_block_size; // -------------------------------------------------------------------------- // ublas reference: ublas::matrix A(matrix_size1, matrix_size2); ublas::matrix B(matrix_size2, matrix_size3); ublas::matrix C(matrix_size1, matrix_size3); //fill A and B: for (unsigned int i = 0; i < A.size1(); ++i) for (unsigned int j = 0; j < A.size2(); ++j) A(i,j) = static_cast(0.1) * random(); for (unsigned int i = 0; i < B.size1(); ++i) for (unsigned int j = 0; j < B.size2(); ++j) B(i,j) = static_cast(0.1) * random(); for (unsigned int i = 0; i < C.size1(); ++i) for (unsigned int j = 0; j < C.size2(); ++j) C(i,j) = static_cast(0.1) * random(); ublas::matrix A_trans = trans(A); ublas::matrix B_trans = trans(B); // // ViennaCL objects // // A viennacl::matrix vcl_A(matrix_size1, matrix_size2); viennacl::copy(A, vcl_A); // A^T viennacl::matrix vcl_A_trans(matrix_size2, matrix_size1); viennacl::copy(A_trans, vcl_A_trans); // B viennacl::matrix vcl_B(matrix_size2, matrix_size3); viennacl::copy(B, vcl_B); // B^T viennacl::matrix vcl_B_trans(matrix_size3, matrix_size2); viennacl::copy(B_trans, vcl_B_trans); // C viennacl::matrix vcl_C(matrix_size1, matrix_size3); viennacl::copy(C, vcl_C); std::cout << "--- Part 1: Testing matrix-matrix products ---" << std::endl; ////// ////// A: matrix ////// // // std::cout << "Now using A=matrix, B=matrix, C=matrix" << std::endl; ret = test_prod(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_B, vcl_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; return EXIT_SUCCESS; } template< typename NumericT, typename Epsilon > int test(Epsilon const& epsilon) { int ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=row, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=row, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=col, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=col, C=row ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=row, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=row, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=row, B=col, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; std::cout << "///////////////////////////////////////" << std::endl; std::cout << "/// Now testing A=col, B=col, C=col ///" << std::endl; std::cout << "///////////////////////////////////////" << std::endl; ret = test_prod(epsilon); if (ret != EXIT_SUCCESS) return ret; return ret; } int main(int argc, char* argv[]) { std::vector args(argv,argv+argc); int retval = EXIT_SUCCESS; typedef std::vector devices_type; //platforms_type platforms = viennacl::ocl::get_platforms(); //size_t num_platforms = platforms.size(); devices_type dev = viennacl::ocl::current_context().devices(); for(devices_type::iterator it = dev.begin() ; it != dev.end() ; ++it){ std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Generated BLAS 3 routines" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; //srand(time(NULL)); std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { typedef float NumericT; NumericT epsilon = NumericT(1.0E-3); std::cout << "# Testing setup:" << std::endl; std::cout << viennacl::ocl::current_device().info() << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: float" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; #ifdef VIENNACL_WITH_OPENCL if( viennacl::ocl::current_device().double_support() ) #endif { { typedef double NumericT; NumericT epsilon = 1.0E-11; std::cout << "# Testing setup:" << std::endl; std::cout << " eps: " << epsilon << std::endl; std::cout << " numeric: double" << std::endl; retval = test(epsilon); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; } std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; } return retval; } ViennaCL-1.5.1-src/tests/src/vector_uint.cpp000644 001750 001750 00000100320 12267307531 020760 0ustar00rupprupp000000 000000 /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ // // *** System // #include #include // // *** Boost // #include #include #include // // *** ViennaCL // //#define VIENNACL_DEBUG_ALL #define VIENNACL_WITH_UBLAS 1 #include "viennacl/vector.hpp" #include "viennacl/vector_proxy.hpp" #include "viennacl/linalg/inner_prod.hpp" #include "viennacl/linalg/norm_1.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/norm_inf.hpp" #include "Random.hpp" using namespace boost::numeric; // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, ScalarType const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::scalar const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ScalarType const & s1, viennacl::entry_proxy const & s2) { viennacl::backend::finish(); return s1 - s2; } // // ------------------------------------------------------------- // template ScalarType diff(ublas::vector const & v1, VCLVectorType const & v2) { ublas::vector v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i int check(T1 const & t1, T2 const & t2) { int retval = EXIT_SUCCESS; if (diff(t1, t2) != 0) { std::cout << "# Error! Difference: " << diff(t1, t2) << std::endl; retval = EXIT_FAILURE; } return retval; } // // ------------------------------------------------------------- // template< typename NumericT, typename UblasVectorType, typename ViennaCLVectorType1, typename ViennaCLVectorType2 > int test(UblasVectorType & ublas_v1, UblasVectorType & ublas_v2, ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2) { int retval = EXIT_SUCCESS; NumericT cpu_result = 42; viennacl::scalar gpu_result = 43; // // Initializer: // std::cout << "Checking for zero_vector initializer..." << std::endl; //ublas_v1 = ublas::zero_vector(ublas_v1.size()); for (std::size_t i=0; i(vcl_v1.size()); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for scalar_vector initializer..." << std::endl; //ublas_v1 = ublas::scalar_vector(ublas_v1.size(), cpu_result); for (std::size_t i=0; i(vcl_v1.size(), cpu_result); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; //ublas_v1 = ublas::scalar_vector(ublas_v1.size(), gpu_result); for (std::size_t i=0; i(vcl_v1.size(), gpu_result); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Checking for unit_vector initializer..." << std::endl; //ublas_v1 = ublas::unit_vector(ublas_v1.size(), 5); for (std::size_t i=0; i(vcl_v1.size(), 5); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; for (std::size_t i=0; i cpu_result) cpu_result = ublas_v1[i]; gpu_result = viennacl::linalg::norm_inf(vcl_v1); if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result2 = 0; for (std::size_t i=0; i cpu_result2) cpu_result2 = ublas_v1[i]; cpu_result = viennacl::linalg::norm_inf(vcl_v1); if (check(cpu_result, cpu_result2) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_result2 = 0; for (std::size_t i=0; i cpu_result2) cpu_result2 = ublas_v1[i] + ublas_v2[i]; cpu_result = viennacl::linalg::norm_inf(vcl_v1 + vcl_v2); if (check(cpu_result, cpu_result2) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing index_norm_inf..." << std::endl; std::size_t cpu_index = 0; cpu_result = 0; for (std::size_t i=0; i cpu_result) { cpu_result = ublas_v1[i]; cpu_index = i; } std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1); if (check(static_cast(cpu_index), static_cast(gpu_index)) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)]; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; cpu_index = 0; cpu_result = 0; for (std::size_t i=0; i cpu_result) { cpu_result = ublas_v1[i]; cpu_index = i; } gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)]; if (check(cpu_result, gpu_result) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- std::cout << "Testing assignments..." << std::endl; NumericT val = static_cast(1); for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = val; for (size_t i=0; i < vcl_v1.size(); ++i) vcl_v1(i) = val; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiplication and division of vectors by scalars // std::cout << "Testing scaling with CPU scalar..." << std::endl; NumericT alpha = static_cast(3); viennacl::scalar gpu_alpha = alpha; ublas_v1 *= alpha; vcl_v1 *= alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing scaling with GPU scalar..." << std::endl; ublas_v1 *= alpha; vcl_v1 *= gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; NumericT beta = static_cast(2); viennacl::scalar gpu_beta = beta; std::cout << "Testing shrinking with CPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing shrinking with GPU scalar..." << std::endl; ublas_v1 /= beta; vcl_v1 /= gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // add and inplace_add of vectors // for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); //resync viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing add on vector..." << std::endl; std::cout << "Checking for successful copy..." << std::endl; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_v2, vcl_v2) != EXIT_SUCCESS) return EXIT_FAILURE; ublas_v1 = ublas_v1 + ublas_v2; vcl_v1 = vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace-add on vector..." << std::endl; ublas_v1 += ublas_v2; vcl_v1 += vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // multiply-add // std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + ublas_v2; vcl_v1 = alpha * vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = alpha * vcl_v1 + beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + alpha * ublas_v2; vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = alpha * ublas_v1 + beta * ublas_v2; vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v1 + beta * ublas_v2; vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += alpha * ublas_v2; vcl_v1 += gpu_alpha * vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // division-add // std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl; for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2; vcl_v1 = vcl_v1 / alpha + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 / beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 * alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v2 / alpha; vcl_v1 += vcl_v2 / alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 + ublas_v2 / alpha; vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 = ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha + ublas_v2 / beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v1 / alpha + ublas_v2 * beta; vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl; ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); ublas_v1 += ublas_v2 * alpha; vcl_v1 += vcl_v2 * gpu_alpha; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // // More complicated expressions (for ensuring the operator overloads work correctly) // for (size_t i=0; i < ublas_v1.size(); ++i) ublas_v1(i) = NumericT(i); ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing three vector additions..." << std::endl; ublas_v1 = ublas_v2 + ublas_v1 + ublas_v2; vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2; if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; // -------------------------------------------------------------------------- ublas_v2 = 3 * ublas_v1; viennacl::copy(ublas_v1.begin(), ublas_v1.end(), vcl_v1.begin()); viennacl::copy(ublas_v2.begin(), ublas_v2.end(), vcl_v2.begin()); std::cout << "Testing swap..." << std::endl; swap(ublas_v1, ublas_v2); swap(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing elementwise multiplication..." << std::endl; std::cout << " v1 = element_prod(v1, v2);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2); vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1, v2);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1, ublas_v2); vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1 + v2, v2);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1 + v2, v2);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2); vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1, v2 + v1);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1, v2 + v1);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1, ublas_v2 + ublas_v1); vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " v1 = element_prod(v1 + v2, v2 + v1);" << std::endl; ublas_v1 = ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " v1 += element_prod(v1 + v2, v2 + v1);" << std::endl; ublas_v1 += ublas::element_prod(ublas_v1 + ublas_v2, ublas_v2 + ublas_v1); vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1); if (check(ublas_v1, vcl_v1) != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << "Testing elementwise division..." << std::endl; for (std::size_t i=0; i int test() { int retval = EXIT_SUCCESS; std::size_t size = 12345; std::cout << "Running tests for vector of size " << size << std::endl; // // Set up UBLAS objects // ublas::vector ublas_full_vec(size); ublas::vector ublas_full_vec2(ublas_full_vec.size()); for (std::size_t i=0; i > ublas_range_vec(ublas_full_vec, r1); ublas::vector_range< ublas::vector > ublas_range_vec2(ublas_full_vec2, r2); ublas::slice s1( ublas_full_vec.size() / 4, 3, ublas_full_vec.size() / 4); ublas::slice s2(2 * ublas_full_vec2.size() / 4, 2, ublas_full_vec2.size() / 4); ublas::vector_slice< ublas::vector > ublas_slice_vec(ublas_full_vec, s1); ublas::vector_slice< ublas::vector > ublas_slice_vec2(ublas_full_vec2, s2); // // Set up ViennaCL objects // viennacl::vector vcl_full_vec(ublas_full_vec.size()); viennacl::vector vcl_full_vec2(ublas_full_vec2.size()); viennacl::fast_copy(ublas_full_vec.begin(), ublas_full_vec.end(), vcl_full_vec.begin()); viennacl::copy(ublas_full_vec2.begin(), ublas_full_vec2.end(), vcl_full_vec2.begin()); viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4); viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4); viennacl::vector_range< viennacl::vector > vcl_range_vec(vcl_full_vec, vcl_r1); viennacl::vector_range< viennacl::vector > vcl_range_vec2(vcl_full_vec2, vcl_r2); { viennacl::vector vcl_short_vec(vcl_range_vec); viennacl::vector vcl_short_vec2 = vcl_range_vec2; ublas::vector ublas_short_vec(ublas_range_vec); ublas::vector ublas_short_vec2(ublas_range_vec2); std::cout << "Testing creation of vectors from range..." << std::endl; if (check(ublas_short_vec, vcl_short_vec) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2) != EXIT_SUCCESS) return EXIT_FAILURE; } viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4); viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4); viennacl::vector_slice< viennacl::vector > vcl_slice_vec(vcl_full_vec, vcl_s1); viennacl::vector_slice< viennacl::vector > vcl_slice_vec2(vcl_full_vec2, vcl_s2); viennacl::vector vcl_short_vec(vcl_slice_vec); viennacl::vector vcl_short_vec2 = vcl_slice_vec2; ublas::vector ublas_short_vec(ublas_slice_vec); ublas::vector ublas_short_vec2(ublas_slice_vec2); std::cout << "Testing creation of vectors from slice..." << std::endl; if (check(ublas_short_vec, vcl_short_vec) != EXIT_SUCCESS) return EXIT_FAILURE; if (check(ublas_short_vec2, vcl_short_vec2) != EXIT_SUCCESS) return EXIT_FAILURE; // // Now start running tests for vectors, ranges and slices: // std::cout << " ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_short_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = range, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = range, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_range_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; /////// std::cout << " ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_short_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = range **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_range_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; std::cout << " ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl; retval = test(ublas_short_vec, ublas_short_vec2, vcl_slice_vec, vcl_slice_vec2); if (retval != EXIT_SUCCESS) return EXIT_FAILURE; return EXIT_SUCCESS; } // // ------------------------------------------------------------- // int main() { std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "## Test :: Vector with Integer types" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; int retval = EXIT_SUCCESS; std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: unsigned int" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; { std::cout << "# Testing setup:" << std::endl; std::cout << " numeric: long" << std::endl; retval = test(); if( retval == EXIT_SUCCESS ) std::cout << "# Test passed" << std::endl; else return retval; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; std::cout << std::endl; std::cout << "------- Test completed --------" << std::endl; std::cout << std::endl; return retval; } ViennaCL-1.5.1-src/README000644 001750 001750 00000007503 12267307463 014657 0ustar00rupprupp000000 000000 ************************************************ **** ViennaCL Readme **** ************************************************ Table of Contents ------------------ 1. Requirements 2. Installation 3. Project Layout 4. Authors and Contact A. License 1. Requirements ----------------- ViennaCL requires the following: * A recent C++ compiler (e.g. gcc 4.x.x) * For accessing GPUs and multi-core CPUs: OpenCL (shared library and include files) * For building some of the tutorials: uBLAS (shipped with the Boost libraries) 2. Installation ----------------- The first step is to extract the file: Unix-based OS: $> gunzip ViennaCL-1.5.1.tar.gz $> tar -xf ViennaCL-1.5.1.tar $> cd ViennaCL-1.5.1 Windows: Extract the file using your favorite compressor/decompressor, e.g. 7-zip. ViennaCL is a header-only library, therefore it is sufficient to copy the subfolder viennacl/ (holding the header files) into you project directory or your system include directory. For instructions on how to set the include paths correctly, please refer to the documentation of your compiler. For building the examples, proceed as follows: Unix-based clients: * change to the build directory: $> cd build * call cmake $> cmake .. * If CMake cannot find the OpenCL files, e.g. 'Could NOT find OPENCL (missing: OPENCL_INCLUDE_DIR)', please set the paths manually, e.g. $> cmake -DOPENCL_INCLUDE_DIR=../CL .. Or, use the CMake GUI. $> cmake-gui Or, disable openCL if you are not planning to use parallel programming components $> cmake .. -DENABLE_OPENCL=0 * Use 'make' to build all examples: $> make (alternatively, you can build them individually via 'make blas1', 'make viennacl-info', etc. * Start the tutorials (optional) $> examples/blas1 $> examples/custom-kernels $> examples/viennacl-info (...) Windows: * Open the CMake GUI * Set the source code location ('Where is the source code:') to the extracted ViennaCL-1.x.x folder * Set the build folder ('Where to build the binaries:') to the subfolder build/ in the ViennaCL-1.x.x folder. * Click on 'Configure' and select your Compiler * Click on 'Configure' again * Click on 'Generate' * Navigate to the build/ folder, open the generated project files with your favorite IDE, and build them. 3. Project Layout ----------------- ---- ViennaCL-1.X.X | |-- auxiliary/ - (only in src-Edition) Auxiliary files (i.e. the OpenCL source code tree and the converter for the header files) | |-- build/ - Build directory for building the examples | |-- CL/ - The OpenCL headers | |-- cmake/ - Additional CMake configuration files | |-- doc/ - Documentation (LaTeX and doxygen) | |-- examples/ - Tutorial and benchmarking applications | |-- testdata/ - Test data for the tutorials and benchmarks | |-- benchmarks/ - A small benchmarking suite | |-- tutorial/ - Some tutorials explaining the usage of ViennaCL | |-- parameters/ - Parameter optimization environment | |-- external/ - External libraries | |-- libviennacl/ - Shared library for interfacing some BLAS functionality of ViennaCL from languages other than C++ | |-- tests/ - Automated test suite using CTest | |-- viennacl/ - The library source code 4. Contact ------------------------ For any technical questions related to ViennaCL, please use our mailing list: viennacl-support@lists.sourceforge.net You may also use the forum provided by sourceforge.net: http://sourceforge.net/projects/viennacl/ For any other issues, please contact the project head Karl Rupp at rupp@iue.tuwien.ac.at. ViennaCL was developed under the aegis of the 'Institute for Microelectronics' at the 'Vienna University of Technology'. A. License ------------ ViennaCL is distributed under the MIT (X11) License. See file LICENSE. ViennaCL-1.5.1-src/viennacl/000755 001750 001750 00000000000 12267307531 015565 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/viennacl/scalar.hpp000644 001750 001750 00000070631 12267307531 017552 0ustar00rupprupp000000 000000 #ifndef VIENNACL_SCALAR_HPP_ #define VIENNACL_SCALAR_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/scalar.hpp @brief Implementation of the ViennaCL scalar class */ #include #include "viennacl/forwards.h" #include "viennacl/backend/memory.hpp" #include "viennacl/meta/result_of.hpp" #include "viennacl/linalg/scalar_operations.hpp" #include "viennacl/traits/handle.hpp" #ifdef VIENNACL_WITH_OPENCL #include "viennacl/ocl/backend.hpp" #endif namespace viennacl { /** @brief A proxy for scalar expressions (e.g. from inner vector products) * * assumption: dim(LHS) >= dim(RHS), where dim(scalar) = 0, dim(vector) = 1 and dim(matrix = 2) * @tparam LHS The left hand side operand * @tparam RHS The right hand side operand * @tparam OP The operation tag */ template class scalar_expression { typedef typename LHS::value_type DummyType; //Visual C++ 2005 does not allow to write LHS::value_type::value_type public: typedef typename viennacl::result_of::cpu_value_type::type ScalarType; scalar_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {} /** @brief Returns the left hand side operand */ LHS & lhs() const { return lhs_; } /** @brief Returns the left hand side operand */ RHS & rhs() const { return rhs_; } /** @brief Conversion operator to a ViennaCL scalar */ operator ScalarType () const { viennacl::scalar temp; temp = *this; return temp; } private: LHS & lhs_; RHS & rhs_; }; /** @brief Specialization of a scalar expression for inner products. Allows for a final reduction on the CPU * * assumption: dim(LHS) >= dim(RHS), where dim(scalar) = 0, dim(vector) = 1 and dim(matrix = 2) * @tparam LHS The left hand side operand * @tparam RHS The right hand side operand * @tparam OP The operation tag */ template class scalar_expression { //typedef typename LHS::value_type DummyType; //Visual C++ 2005 does not allow to write LHS::value_type::value_type public: typedef typename viennacl::result_of::cpu_value_type::type ScalarType; scalar_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {} /** @brief Returns the left hand side operand */ LHS & lhs() const { return lhs_; } /** @brief Returns the left hand side operand */ RHS & rhs() const { return rhs_; } /** @brief Conversion operator to a ViennaCL scalar */ operator ScalarType () const { ScalarType result; viennacl::linalg::inner_prod_cpu(lhs_, rhs_, result); return result; } private: LHS & lhs_; RHS & rhs_; }; /** @brief Specialization of a scalar expression for norm_1. Allows for a final reduction on the CPU * * @tparam LHS The left hand side operand * @tparam RHS The right hand side operand */ template class scalar_expression { //typedef typename LHS::value_type DummyType; //Visual C++ 2005 does not allow to write LHS::value_type::value_type public: typedef typename viennacl::result_of::cpu_value_type::type ScalarType; scalar_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {} /** @brief Returns the left hand side operand */ LHS & lhs() const { return lhs_; } /** @brief Returns the left hand side operand */ RHS & rhs() const { return rhs_; } /** @brief Conversion operator to a ViennaCL scalar */ operator ScalarType () const { ScalarType result; viennacl::linalg::norm_1_cpu(lhs_, result); return result; } private: LHS & lhs_; RHS & rhs_; }; /** @brief Specialization of a scalar expression for norm_2. Allows for a final reduction on the CPU * * @tparam LHS The left hand side operand * @tparam RHS The right hand side operand */ template class scalar_expression { //typedef typename LHS::value_type DummyType; //Visual C++ 2005 does not allow to write LHS::value_type::value_type public: typedef typename viennacl::result_of::cpu_value_type::type ScalarType; scalar_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {} /** @brief Returns the left hand side operand */ LHS & lhs() const { return lhs_; } /** @brief Returns the left hand side operand */ RHS & rhs() const { return rhs_; } /** @brief Conversion operator to a ViennaCL scalar */ operator ScalarType () const { ScalarType result; viennacl::linalg::norm_2_cpu(lhs_, result); return result; } private: LHS & lhs_; RHS & rhs_; }; /** @brief Specialization of a scalar expression for norm_inf. Allows for a final reduction on the CPU * * @tparam LHS The left hand side operand * @tparam RHS The right hand side operand */ template class scalar_expression { //typedef typename LHS::value_type DummyType; //Visual C++ 2005 does not allow to write LHS::value_type::value_type public: typedef typename viennacl::result_of::cpu_value_type::type ScalarType; scalar_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {} /** @brief Returns the left hand side operand */ LHS & lhs() const { return lhs_; } /** @brief Returns the left hand side operand */ RHS & rhs() const { return rhs_; } /** @brief Conversion operator to a ViennaCL scalar */ operator ScalarType () const { ScalarType result; viennacl::linalg::norm_inf_cpu(lhs_, result); return result; } private: LHS & lhs_; RHS & rhs_; }; /** @brief Specialization of a scalar expression for norm_frobenius. Allows for a final reduction on the CPU * * @tparam LHS The left hand side operand * @tparam RHS The right hand side operand */ template class scalar_expression { //typedef typename LHS::value_type DummyType; //Visual C++ 2005 does not allow to write LHS::value_type::value_type public: typedef typename viennacl::result_of::cpu_value_type::type ScalarType; scalar_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {} /** @brief Returns the left hand side operand */ LHS & lhs() const { return lhs_; } /** @brief Returns the left hand side operand */ RHS & rhs() const { return rhs_; } /** @brief Conversion operator to a ViennaCL scalar */ operator ScalarType () const { ScalarType result; viennacl::linalg::norm_frobenius_cpu(lhs_, result); return result; } private: LHS & lhs_; RHS & rhs_; }; /** @brief This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type like float or double. * * Since every read and write operation requires a CPU->GPU or GPU->CPU transfer, this type should be used with care. * The advantage of this type is that the GPU command queue can be filled without blocking read operations. * * @tparam SCALARTYPE Either float or double. Checked at compile time. */ template class scalar { typedef scalar self_type; public: typedef viennacl::backend::mem_handle handle_type; typedef vcl_size_t size_type; /** @brief Returns the underlying host scalar type. */ typedef SCALARTYPE value_type; /** @brief Creates the scalar object, but does not yet allocate memory. Thus, scalar<> can also be a global variable (if really necessary). */ scalar() {} /** @brief Allocates the memory for the scalar and sets it to the supplied value. */ scalar(SCALARTYPE val, viennacl::context ctx = viennacl::context()) { viennacl::backend::memory_create(val_, sizeof(SCALARTYPE), ctx, &val); } #ifdef VIENNACL_WITH_OPENCL /** @brief Wraps an existing memory entry into a scalar * * @param mem The OpenCL memory handle * @param size Ignored - Only necessary to avoid ambiguities. Users are advised to set this parameter to '1'. */ explicit scalar(cl_mem mem, size_type /*size*/) { val_.switch_active_handle_id(viennacl::OPENCL_MEMORY); val_.opencl_handle() = mem; val_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. } #endif /** @brief Allocates memory for the scalar and sets it to the result of supplied expression. */ template scalar(scalar_expression const & proxy) { val_.switch_active_handle_id(viennacl::traits::handle(proxy.lhs()).get_active_handle_id()); viennacl::backend::memory_create(val_, sizeof(SCALARTYPE), viennacl::traits::context(proxy)); *this = proxy; } //copy constructor /** @brief Copy constructor. Allocates new memory for the scalar and copies the value of the supplied scalar */ scalar(const scalar & other) { if (other.handle().get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED) { //copy value: val_.switch_active_handle_id(other.handle().get_active_handle_id()); viennacl::backend::memory_create(val_, sizeof(SCALARTYPE), viennacl::traits::context(other)); viennacl::backend::memory_copy(other.handle(), val_, 0, 0, sizeof(SCALARTYPE)); } } /** @brief Reads the value of the scalar from the GPU and returns the float or double value. */ operator SCALARTYPE() const { // make sure the scalar contains reasonable data: assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized, cannot read!")); SCALARTYPE tmp; viennacl::backend::memory_read(val_, 0, sizeof(SCALARTYPE), &tmp); return tmp; } /** @brief Assigns a vector entry. */ self_type & operator= (entry_proxy const & other) { init_if_necessary(viennacl::traits::context(other)); viennacl::backend::memory_copy(other.handle(), val_, other.index() * sizeof(SCALARTYPE), 0, sizeof(SCALARTYPE)); return *this; } /** @brief Assigns the value from another scalar. */ self_type & operator= (scalar const & other) { init_if_necessary(viennacl::traits::context(other)); viennacl::backend::memory_copy(other.handle(), val_, 0, 0, sizeof(SCALARTYPE)); return *this; } self_type & operator= (float cpu_other) { init_if_necessary(viennacl::context()); //copy value: SCALARTYPE value = static_cast(cpu_other); viennacl::backend::memory_write(val_, 0, sizeof(SCALARTYPE), &value); return *this; } self_type & operator= (double cpu_other) { init_if_necessary(viennacl::context()); SCALARTYPE value = static_cast(cpu_other); viennacl::backend::memory_write(val_, 0, sizeof(SCALARTYPE), &value); return *this; } self_type & operator= (long cpu_other) { init_if_necessary(viennacl::context()); SCALARTYPE value = static_cast(cpu_other); viennacl::backend::memory_write(val_, 0, sizeof(SCALARTYPE), &value); return *this; } self_type & operator= (unsigned long cpu_other) { init_if_necessary(viennacl::context()); SCALARTYPE value = static_cast(cpu_other); viennacl::backend::memory_write(val_, 0, sizeof(SCALARTYPE), &value); return *this; } self_type & operator= (int cpu_other) { init_if_necessary(viennacl::context()); SCALARTYPE value = static_cast(cpu_other); viennacl::backend::memory_write(val_, 0, sizeof(SCALARTYPE), &value); return *this; } self_type & operator= (unsigned int cpu_other) { init_if_necessary(viennacl::context()); SCALARTYPE value = static_cast(cpu_other); viennacl::backend::memory_write(val_, 0, sizeof(SCALARTYPE), &value); return *this; } /** @brief Sets the scalar to the result of supplied inner product expression. */ template self_type & operator= (scalar_expression const & proxy) { init_if_necessary(viennacl::traits::context(proxy)); viennacl::linalg::inner_prod_impl(proxy.lhs(), proxy.rhs(), *this); return *this; } /** @brief Sets the scalar to the result of supplied norm_1 expression. */ template self_type & operator= (scalar_expression const & proxy) { init_if_necessary(viennacl::traits::context(proxy)); viennacl::linalg::norm_1_impl(proxy.lhs(), *this); return *this; } /** @brief Sets the scalar to the result of supplied norm_2 expression. */ template self_type & operator= (scalar_expression const & proxy) { init_if_necessary(viennacl::traits::context(proxy)); viennacl::linalg::norm_2_impl(proxy.lhs(), *this); return *this; } /** @brief Sets the scalar to the result of supplied norm_inf expression. */ template self_type & operator= (scalar_expression const & proxy) { init_if_necessary(viennacl::traits::context(proxy)); viennacl::linalg::norm_inf_impl(proxy.lhs(), *this); return *this; } /** @brief Sets the scalar to the result of supplied norm_frobenius expression. */ template self_type & operator= (scalar_expression const & proxy) { init_if_necessary(viennacl::traits::context(proxy)); viennacl::linalg::norm_frobenius_impl(proxy.lhs(), *this); return *this; } /** @brief Sets the scalar to the inverse with respect to addition of the supplied sub-expression */ template self_type & operator= (scalar_expression const & proxy) { init_if_necessary(viennacl::traits::context(proxy)); viennacl::linalg::as(*this, proxy.lhs(), SCALARTYPE(-1.0), 1, false, true); return *this; } /** @brief Inplace addition of a ViennaCL scalar */ self_type & operator += (scalar const & other) { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); viennacl::linalg::asbs(*this, // s1 = *this, SCALARTYPE(1.0), 1, false, false, // s1 * 1.0 other, SCALARTYPE(1.0), 1, false, false); // + s2 * 1.0 return *this; } /** @brief Inplace addition of a host scalar (float or double) */ self_type & operator += (SCALARTYPE other) { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); viennacl::linalg::asbs(*this, // s1 = *this, SCALARTYPE(1.0), 1, false, false, // s1 * 1.0 other, SCALARTYPE(1.0), 1, false, false); // + s2 * 1.0 return *this; } /** @brief Inplace subtraction of a ViennaCL scalar */ self_type & operator -= (scalar const & other) { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); viennacl::linalg::asbs(*this, // s1 = *this, SCALARTYPE(1.0), 1, false, false, // s1 * 1.0 other, SCALARTYPE(-1.0), 1, false, false); // + s2 * (-1.0) return *this; } /** @brief Inplace subtraction of a host scalar (float or double) */ self_type & operator -= (SCALARTYPE other) { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); viennacl::linalg::asbs(*this, // s1 = *this, SCALARTYPE(1.0), 1, false, false, // s1 * 1.0 other, SCALARTYPE(-1.0), 1, false, false); // + s2 * (-1.0) return *this; } /** @brief Inplace multiplication with a ViennaCL scalar */ self_type & operator *= (scalar const & other) { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); viennacl::linalg::as(*this, // s1 = *this, other, 1, false, false); // s1 * s2 return *this; } /** @brief Inplace multiplication with a host scalar (float or double) */ self_type & operator *= (SCALARTYPE other) { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); viennacl::linalg::as(*this, // s1 = *this, other, 1, false, false); // s1 * s2 return *this; } //////////////// operator /= //////////////////////////// /** @brief Inplace division with a ViennaCL scalar */ self_type & operator /= (scalar const & other) { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); viennacl::linalg::as(*this, // s1 = *this, other, 1, true, false); // s1 / s2 return *this; } /** @brief Inplace division with a host scalar (float or double) */ self_type & operator /= (SCALARTYPE other) { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); viennacl::linalg::as(*this, // s1 = *this, other, 1, true, false); // s1 / s2 return *this; } //////////////// operator + //////////////////////////// /** @brief Addition of two ViennaCL scalars */ self_type operator + (scalar const & other) { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); self_type result = 0; viennacl::linalg::asbs(result, // result = *this, SCALARTYPE(1.0), 1, false, false, // *this * 1.0 other, SCALARTYPE(1.0), 1, false, false); // + other * 1.0 return result; } /** @brief Addition of a ViennaCL scalar with a scalar expression */ template self_type operator + (scalar_expression const & proxy) const { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); self_type result = proxy; viennacl::linalg::asbs(result, // result = *this, SCALARTYPE(1.0), 1, false, false, // *this * 1.0 result, SCALARTYPE(1.0), 1, false, false); // + result * 1.0 return result; } /** @brief Addition of a ViennaCL scalar with a host scalar (float, double) */ self_type operator + (SCALARTYPE other) { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); self_type result = 0; viennacl::linalg::asbs(result, // result = *this, SCALARTYPE(1.0), 1, false, false, // *this * 1.0 other, SCALARTYPE(1.0), 1, false, false); // + other * 1.0 return result; } //////////////// operator - //////////////////////////// /** @brief Sign flip of the scalar. Does not evaluate immediately, but instead returns an expression template object */ scalar_expression operator-() const { return scalar_expression(*this, *this); } /** @brief Subtraction of two ViennaCL scalars */ self_type operator - (scalar const & other) const { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); self_type result = 0; viennacl::linalg::asbs(result, // result = *this, SCALARTYPE(1.0), 1, false, false, // *this * 1.0 other, SCALARTYPE(-1.0), 1, false, false); // + other * (-1.0) return result; } /** @brief Subtraction of a ViennaCL scalar from a scalar expression */ template self_type operator - (scalar_expression const & proxy) const { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); self_type result = proxy; viennacl::linalg::asbs(result, // result = *this, SCALARTYPE(1.0), 1 , false, false, // *this * 1.0 result, SCALARTYPE(-1.0), 1, false, false); // + result * (-1.0) return result; } /** @brief Subtraction of a host scalar (float, double) from a ViennaCL scalar */ scalar operator - (SCALARTYPE other) const { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); self_type result = 0; viennacl::linalg::asbs(result, // result = *this, SCALARTYPE(1.0), 1, false, false, // *this * 1.0 other, SCALARTYPE(-1.0), 1, false, false); // + other * (-1.0) return result; } //////////////// operator * //////////////////////////// /** @brief Multiplication of two ViennaCL scalars */ self_type operator * (scalar const & other) const { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); scalar result = 0; viennacl::linalg::as(result, // result = *this, other, 1, false, false); // *this * other return result; } /** @brief Multiplication of a ViennaCL scalar with a scalar expression */ template self_type operator * (scalar_expression const & proxy) const { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); self_type result = proxy; viennacl::linalg::as(result, // result = *this, result, 1, false, false); // *this * proxy return result; } /** @brief Multiplication of a host scalar (float, double) with a ViennaCL scalar */ self_type operator * (SCALARTYPE other) const { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); scalar result = 0; viennacl::linalg::as(result, // result = *this, other, 1, false, false); // *this * other return result; } //////////////// operator / //////////////////////////// /** @brief Division of two ViennaCL scalars */ self_type operator / (scalar const & other) const { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); self_type result = 0; viennacl::linalg::as(result, // result = *this, other, 1, true, false); // *this / other return result; } /** @brief Division of a ViennaCL scalar by a scalar expression */ template self_type operator / (scalar_expression const & proxy) const { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); self_type result = proxy; viennacl::linalg::as(result, // result = *this, result, 1, true, false); // *this / proxy return result; } /** @brief Division of a ViennaCL scalar by a host scalar (float, double)*/ self_type operator / (SCALARTYPE other) const { assert( val_.get_active_handle_id() != viennacl::MEMORY_NOT_INITIALIZED && bool("Scalar not initialized!")); self_type result = 0; viennacl::linalg::as(result, // result = *this, other, 1, true, false); // *this / other return result; } /** @brief Returns the memory handle, non-const version */ handle_type & handle() { return val_; } /** @brief Returns the memory handle, const version */ const handle_type & handle() const { return val_; } private: void init_if_necessary(viennacl::context ctx) { if (val_.get_active_handle_id() == viennacl::MEMORY_NOT_INITIALIZED) { viennacl::backend::memory_create(val_, sizeof(SCALARTYPE), ctx); } } handle_type val_; }; //stream operators: /** @brief Allows to directly print the value of a scalar to an output stream */ template std::ostream & operator<<(std::ostream & s, const scalar & val) { SCALARTYPE temp = val; s << temp; return s; } /** @brief Allows to directly read a value of a scalar from an input stream */ template std::istream & operator>>(std::istream & s, const scalar & val) { SCALARTYPE temp; s >> temp; val = temp; return s; } } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/ocl/000755 001750 001750 00000000000 12267307531 016342 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/viennacl/ocl/command_queue.hpp000644 001750 001750 00000004710 12267307531 021677 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_COMMAND_QUEUE_HPP_ #define VIENNACL_OCL_COMMAND_QUEUE_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/command_queue.hpp @brief Implementations of command queue representations */ #ifdef __APPLE__ #include #else #include #endif #include #include #include #include "viennacl/ocl/device.hpp" #include "viennacl/ocl/handle.hpp" namespace viennacl { namespace ocl { /** @brief A class representing a command queue * */ class command_queue { public: command_queue() {} command_queue(viennacl::ocl::handle h) : handle_(h) {} //Copy constructor: command_queue(command_queue const & other) { handle_ = other.handle_; } //assignment operator: command_queue & operator=(command_queue const & other) { handle_ = other.handle_; return *this; } bool operator==(command_queue const & other) const { return handle_ == other.handle_; } /** @brief Waits until all kernels in the queue have finished their execution */ void finish() const { clFinish(handle_.get()); } /** @brief Waits until all kernels in the queue have started their execution */ void flush() const { clFlush(handle_.get()); } viennacl::ocl::handle const & handle() const { return handle_; } viennacl::ocl::handle & handle() { return handle_; } private: viennacl::ocl::handle handle_; }; } //namespace ocl } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/ocl/forwards.h000644 001750 001750 00000003600 12267307531 020341 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_FORWARDS_H_ #define VIENNACL_OCL_FORWARDS_H_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/forwards.h @brief This file provides the forward declarations for the OpenCL layer of ViennaCL */ #define VIENNACL_OCL_MAX_DEVICE_NUM 8 #include namespace viennacl { namespace ocl { //device type tags (cf. OpenCL standard) /** @brief A tag identifying OpenCL devices as GPUs. */ struct gpu_tag {}; /** @brief A tag identifying OpenCL devices as CPUs. */ struct cpu_tag {}; /** @brief A tag identifying OpenCL devices as accelerators (e.g. Intel Xeon Phi) */ struct accelerator_tag {}; /** @brief A tag denoting the default OpenCL device type (SDK-specific) */ struct default_tag {}; class kernel; class device; class command_queue; class context; class program; template class handle; template void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue); inline viennacl::ocl::context & current_context(); inline viennacl::ocl::device const & current_device(); } } //namespace viennacl #endif /*@}*/ ViennaCL-1.5.1-src/viennacl/ocl/handle.hpp000644 001750 001750 00000015343 12267307531 020314 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_HANDLE_HPP_ #define VIENNACL_OCL_HANDLE_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/handle.hpp @brief Implementation of a smart-pointer-like class for handling OpenCL handles. */ #ifdef __APPLE__ #include #else #include #endif #include #include #include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/error.hpp" namespace viennacl { namespace ocl { /** @brief Helper for OpenCL reference counting used by class handle. * @tparam OCL_TYPE Must be one out of cl_mem, cl_program, cl_kernel, cl_command_queue and cl_context, otherwise a compile time error is thrown. */ template class handle_inc_dec_helper { typedef typename OCL_TYPE::ERROR_TEMPLATE_ARGUMENT_FOR_CLASS_INVALID ErrorType; }; /** \cond */ //cl_mem: template <> struct handle_inc_dec_helper { static void inc(cl_mem & something) { cl_int err = clRetainMemObject(something); VIENNACL_ERR_CHECK(err); } static void dec(cl_mem & something) { #ifndef __APPLE__ cl_int err = clReleaseMemObject(something); VIENNACL_ERR_CHECK(err); #endif } }; //cl_program: template <> struct handle_inc_dec_helper { static void inc(cl_program & something) { cl_int err = clRetainProgram(something); VIENNACL_ERR_CHECK(err); } static void dec(cl_program & something) { #ifndef __APPLE__ cl_int err = clReleaseProgram(something); VIENNACL_ERR_CHECK(err); #endif } }; //cl_kernel: template <> struct handle_inc_dec_helper { static void inc(cl_kernel & something) { cl_int err = clRetainKernel(something); VIENNACL_ERR_CHECK(err); } static void dec(cl_kernel & something) { #ifndef __APPLE__ cl_int err = clReleaseKernel(something); VIENNACL_ERR_CHECK(err); #endif } }; //cl_command_queue: template <> struct handle_inc_dec_helper { static void inc(cl_command_queue & something) { cl_int err = clRetainCommandQueue(something); VIENNACL_ERR_CHECK(err); } static void dec(cl_command_queue & something) { #ifndef __APPLE__ cl_int err = clReleaseCommandQueue(something); VIENNACL_ERR_CHECK(err); #endif } }; //cl_context: template <> struct handle_inc_dec_helper { static void inc(cl_context & something) { cl_int err = clRetainContext(something); VIENNACL_ERR_CHECK(err); } static void dec(cl_context & something) { #ifndef __APPLE__ cl_int err = clReleaseContext(something); VIENNACL_ERR_CHECK(err); #endif } }; /** \endcond */ /** @brief Handle class the effectively represents a smart pointer for OpenCL handles */ template class handle { public: handle() : h_(0), p_context_(NULL) {} handle(const OCL_TYPE & something, viennacl::ocl::context const & c) : h_(something), p_context_(&c) {} handle(const handle & other) : h_(other.h_), p_context_(other.p_context_) { if (h_ != 0) inc(); } ~handle() { if (h_ != 0) dec(); } /** @brief Copies the OpenCL handle from the provided handle. Does not take ownership like e.g. std::auto_ptr<>, so both handle objects are valid (more like shared_ptr). */ handle & operator=(const handle & other) { if (h_ != 0) dec(); h_ = other.h_; p_context_ = other.p_context_; inc(); return *this; } /** @brief Wraps an OpenCL handle. Does not change the context of this handle object! Decreases the reference count if the handle object is destroyed or another OpenCL handle is assigned. */ handle & operator=(const OCL_TYPE & something) { if (h_ != 0) dec(); h_ = something; return *this; } /** @brief Wraps an OpenCL handle including its associated context. Decreases the reference count if the handle object is destroyed or another OpenCL handle is assigned. */ handle & operator=(std::pair p) { if (h_ != 0) dec(); h_ = p.first; p_context_ = p.second; return *this; } /** @brief Implicit conversion to the plain OpenCL handle. DEPRECATED and will be removed some time in the future. */ operator OCL_TYPE() const { return h_; } const OCL_TYPE & get() const { return h_; } viennacl::ocl::context const & context() const { assert(p_context_ != NULL && bool("Logic error: Accessing dangling context from handle.")); return *p_context_; } void context(viennacl::ocl::context const & c) { p_context_ = &c; } /** @brief Swaps the OpenCL handle of two handle objects */ handle & swap(handle & other) { OCL_TYPE tmp = other.h_; other.h_ = this->h_; this->h_ = tmp; viennacl::ocl::context const * tmp2 = other.p_context_; other.p_context_ = this->p_context_; this->p_context_ = tmp2; return *this; } /** @brief Manually increment the OpenCL reference count. Typically called automatically, but is necessary if user-supplied memory objects are wrapped. */ void inc() { handle_inc_dec_helper::inc(h_); } /** @brief Manually decrement the OpenCL reference count. Typically called automatically, but might be useful with user-supplied memory objects. */ void dec() { handle_inc_dec_helper::dec(h_); } private: OCL_TYPE h_; viennacl::ocl::context const * p_context_; }; } //namespace ocl } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/ocl/local_mem.hpp000644 001750 001750 00000002675 12267307531 021015 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_LOCAL_MEM_HPP_ #define VIENNACL_OCL_LOCAL_MEM_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/local_mem.hpp @brief A local (shared) memory object for OpenCL */ #include "viennacl/forwards.h" namespace viennacl { namespace ocl { /** @brief A class representing local (shared) OpenCL memory. Typically used as kernel argument */ class local_mem { public: local_mem(vcl_size_t s) : size_(s) {} /** @brief Returns size in bytes */ vcl_size_t size() const { return size_; } /** @brief Sets the size of the local memory in bytes */ void size(vcl_size_t s) { size_ = s; } private: vcl_size_t size_; }; } } #endif ViennaCL-1.5.1-src/viennacl/ocl/infos.hpp000644 001750 001750 00000031317 12267307531 020176 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_INFOS_HPP_ #define VIENNACL_OCL_INFOS_HPP_ /* ========================================================================= Copyright (c) 2010-2012, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/infos.hpp @brief Implementation of convenience functions to get infos */ #ifdef __APPLE__ #include #else #include #endif #include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/error.hpp" namespace viennacl{ namespace ocl{ /** @brief Implementation details for the OpenCL managment layer in ViennaCL */ namespace detail{ /** @brief Helper class for obtaining informations from the OpenCL backend. Deprecated! */ template struct info; /** \cond */ template<> struct info{ typedef cl_mem_info type; static void get(cl_mem mem, cl_mem_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){ cl_int err = clGetMemObjectInfo(mem,param_name,param_value_size,param_value,param_value_size_ret); VIENNACL_ERR_CHECK(err); } }; template<> struct info{ typedef cl_device_info type; static void get(cl_device_id device, cl_device_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){ cl_int err = clGetDeviceInfo(device,param_name,param_value_size,param_value,param_value_size_ret); VIENNACL_ERR_CHECK(err); } }; template<> struct info{ typedef cl_kernel_info type; static void get(cl_kernel kernel, cl_kernel_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){ cl_int err = clGetKernelInfo(kernel,param_name,param_value_size,param_value,param_value_size_ret); VIENNACL_ERR_CHECK(err); } static void get(cl_kernel kernel, cl_device_id dev_id, cl_kernel_work_group_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){ cl_int err = clGetKernelWorkGroupInfo(kernel, dev_id, param_name,param_value_size,param_value,param_value_size_ret); VIENNACL_ERR_CHECK(err); } }; template<> struct info{ typedef cl_context_info type; static void get(cl_context context, cl_context_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){ cl_int err = clGetContextInfo(context,param_name,param_value_size,param_value,param_value_size_ret); VIENNACL_ERR_CHECK(err); } }; template<> struct info{ typedef cl_program_info type; static void get(cl_program context, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){ cl_int err = clGetProgramInfo(context,param_name,param_value_size,param_value,param_value_size_ret); VIENNACL_ERR_CHECK(err); } }; template struct get_info_impl{ template RES_T operator()(MEM_T const & mem, INFO_T const & info){ RES_T res; detail::info::get(mem,info,sizeof(RES_T),&res,NULL); return res; } template RES_T operator()(MEM_T const & mem, MEM2_T const & mem2, INFO_T const & info){ RES_T res; detail::info::get(mem,mem2, info,sizeof(RES_T),&res,NULL); return res; } }; template<> struct get_info_impl{ template std::string operator()(const MEM_T &mem, const INFO_T &info){ char buff[1024]; detail::info::get(mem,info,1024,buff,NULL); return std::string(buff); } }; template struct get_info_impl >{ template std::vector operator()(const MEM_T &mem, const INFO_T &info){ size_t vec_size; detail::info::get(mem,info,0,NULL,&vec_size); std::vector res(vec_size/sizeof(T)); detail::info::get(mem,info,vec_size,res.data(),NULL); return res; } }; template::type param> struct return_type; /** \endcond */ /** \cond */ #define SET_INFO_RETURN_TYPE(DATA_TYPE,NAME,RETURN_TYPE) template<> struct return_type { typedef RETURN_TYPE Result; } SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_TYPE, cl_mem_object_type); SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_FLAGS, cl_mem_flags); SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_SIZE, size_t); SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_HOST_PTR, void*); SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_MAP_COUNT, cl_uint); SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_REFERENCE_COUNT, cl_uint); SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_CONTEXT, cl_context); SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_REFERENCE_COUNT,cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ADDRESS_BITS, cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_AVAILABLE, cl_bool); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_COMPILER_AVAILABLE, cl_bool); // SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ENDIAN_LITTLE, cl_bool); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_EXTENSIONS, std::string); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong); // SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE_SUPPORT, cl_bool); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT , size_t); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH , size_t); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_DEPTH , size_t); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_HEIGHT , size_t); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_WIDTH , size_t); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_COMPUTE_UNITS , cl_uint); //The minimum value is 1 SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CONSTANT_ARGS , cl_uint); //The minimum value is 8 SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE , cl_ulong); //The minimum value is 64 KB SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE , cl_ulong); //The minimum value is max (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024) SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_PARAMETER_SIZE , size_t); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_READ_IMAGE_ARGS , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_SAMPLERS , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE , size_t); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES , std::vector); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WRITE_IMAGE_ARGS , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MEM_BASE_ADDR_ALIGN , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_NAME , std::string); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PLATFORM , cl_platform_id); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PROFILE , std::string); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PROFILING_TIMER_RESOLUTION , size_t); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_QUEUE_PROPERTIES , cl_command_queue_properties); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_SINGLE_FP_CONFIG , cl_device_fp_config); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_TYPE , cl_device_type); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VENDOR , std::string); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VENDOR_ID , cl_uint); SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VERSION , std::string); SET_INFO_RETURN_TYPE(cl_device_id, CL_DRIVER_VERSION , std::string); SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_FUNCTION_NAME, std::string); SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_NUM_ARGS, cl_uint); SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_REFERENCE_COUNT, cl_uint); SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_CONTEXT, cl_context); SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_PROGRAM, cl_program); SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_WORK_GROUP_SIZE, size_t); // SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_COMPILE_WORK_GROUP_SIZE, size_t[3]); SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong); SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_t); SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_NUM_DEVICES, cl_uint); SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_REFERENCE_COUNT, cl_uint); SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_PROPERTIES, cl_context_properties); #undef SET_INFO_RETURN_TYPE /** \endcond */ } template typename detail::return_type::Result info(cl_device_id const & handle){ typedef typename detail::return_type::Result res_t; return detail::get_info_impl()(handle,param); } template typename detail::return_type::Result info(cl_mem const & handle){ typedef typename detail::return_type::Result res_t; return detail::get_info_impl()(handle,param); } template typename detail::return_type::Result info(cl_program const & handle){ typedef typename detail::return_type::Result res_t; return detail::get_info_impl()(handle,param); } // template // typename detail::return_type::Result info(cl_kernel const & handle){ // typedef typename detail::return_type::Result res_t; // return detail::get_info_impl()(handle,param); // } // template // typename detail::return_type::Result info(cl_kernel const & handle, cl_device_id const & handle2){ // typedef typename detail::return_type::Result res_t; // return detail::get_info_impl()(handle,handle2,param); // } template typename detail::return_type::Result info(cl_context const & handle){ typedef typename detail::return_type::Result res_t; return detail::get_info_impl()(handle,param); } template::type param> typename detail::return_type::Result info(OCL_TYPE const & handle){ return viennacl::ocl::info(handle.get()); } } } #endif // INFOS_HPP ViennaCL-1.5.1-src/viennacl/ocl/platform.hpp000644 001750 001750 00000011545 12267307531 020705 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_PLATFORM_HPP_ #define VIENNACL_OCL_PLATFORM_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/platform.hpp @brief Implements a OpenCL platform within ViennaCL */ #ifdef __APPLE__ #include #else #include #endif #include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/device.hpp" namespace viennacl { namespace ocl { /** @brief Wrapper class for an OpenCL platform. * * This class was written when the OpenCL C++ bindings haven't been standardized yet. * Regardless, it takes care about some additional details and is supposed to provide higher convenience. */ class platform { public: platform(vcl_size_t pf_index = 0) { cl_int err; cl_uint num_platforms; cl_platform_id ids[42]; //no more than 42 platforms supported... #if defined(VIENNACL_DEBUG_ALL) std::cout << "ViennaCL: Getting platform..." << std::endl; #endif err = clGetPlatformIDs(42, ids, &num_platforms); VIENNACL_ERR_CHECK(err); assert(num_platforms > pf_index && bool("ViennaCL: ERROR: Not enough platforms found!")); id_ = ids[pf_index]; assert(num_platforms > 0 && bool("ViennaCL: ERROR: No platform found!")); } platform(cl_platform_id pf_id) : id_(pf_id) {} platform(platform const & other) : id_(other.id_) {} void operator=(cl_platform_id pf_id) { id_ = pf_id; } cl_platform_id id() const { return id_; } /** @brief Returns an information string */ std::string info() const { char buffer[1024]; cl_int err; err = clGetPlatformInfo(id_, CL_PLATFORM_VENDOR, 1024 * sizeof(char), buffer, NULL); VIENNACL_ERR_CHECK(err); std::stringstream ss; ss << buffer << ": "; err = clGetPlatformInfo(id_, CL_PLATFORM_VERSION, 1024 * sizeof(char), buffer, NULL); VIENNACL_ERR_CHECK(err); ss << buffer; return ss.str(); } //////////////////// get device ////////////////// /** @brief Returns the available devices of the supplied device type */ std::vector devices(cl_device_type dtype = CL_DEVICE_TYPE_DEFAULT) { cl_int err; #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE) std::cout << "ViennaCL: Querying devices available at current platform." << std::endl; #endif cl_device_id device_ids[VIENNACL_OCL_MAX_DEVICE_NUM]; cl_uint num_devices; err = clGetDeviceIDs(id_, dtype, VIENNACL_OCL_MAX_DEVICE_NUM, device_ids, &num_devices); if (err == CL_DEVICE_NOT_FOUND && dtype == CL_DEVICE_TYPE_DEFAULT) { //workaround for ATI Stream SDK v2.3: No CPUs detected with default device type: err = clGetDeviceIDs(id_, CL_DEVICE_TYPE_CPU, VIENNACL_OCL_MAX_DEVICE_NUM, device_ids, &num_devices); } VIENNACL_ERR_CHECK(err); #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE) std::cout << "ViennaCL: Found " << num_devices << " devices." << std::endl; #endif assert(num_devices > 0 && bool("Error in viennacl::ocl::platform::devices(): No OpenCL devices available!")); std::vector devices; for (cl_uint i=0; i get_platforms() { std::vector< platform > ret; cl_int err; cl_uint num_platforms; cl_platform_id ids[42]; //no more than 42 platforms supported... #if defined(VIENNACL_DEBUG_ALL) std::cout << "ViennaCL: Getting platform..." << std::endl; #endif err = clGetPlatformIDs(42, ids, &num_platforms); VIENNACL_ERR_CHECK(err); for (cl_uint i = 0; i < num_platforms; ++i) ret.push_back( platform(ids[i]) ); return ret; } } } #endif ViennaCL-1.5.1-src/viennacl/ocl/enqueue.hpp000644 001750 001750 00000011403 12267307531 020521 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_ENQUEUE_HPP_ #define VIENNACL_OCL_ENQUEUE_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/enqueue.hpp @brief Enqueues kernels into command queues */ #ifdef __APPLE__ #include #else #include #endif #include "viennacl/ocl/backend.hpp" #include "viennacl/ocl/kernel.hpp" #include "viennacl/ocl/command_queue.hpp" #include "viennacl/ocl/context.hpp" namespace viennacl { namespace generator{ class custom_operation; void enqueue_custom_op(viennacl::generator::custom_operation & op, viennacl::ocl::command_queue const & queue); } namespace ocl { /** @brief Enqueues a kernel in the provided queue */ template void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue) { // 1D kernel: if (k.local_work_size(1) == 0) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Starting 1D-kernel '" << k.name() << "'..." << std::endl; std::cout << "ViennaCL: Global work size: '" << k.global_work_size() << "'..." << std::endl; std::cout << "ViennaCL: Local work size: '" << k.local_work_size() << "'..." << std::endl; #endif vcl_size_t tmp_global = k.global_work_size(); vcl_size_t tmp_local = k.local_work_size(); cl_int err; if (tmp_global == 1 && tmp_local == 1) err = clEnqueueTask(queue.handle().get(), k.handle().get(), 0, NULL, NULL); else err = clEnqueueNDRangeKernel(queue.handle().get(), k.handle().get(), 1, NULL, &tmp_global, &tmp_local, 0, NULL, NULL); if (err != CL_SUCCESS) { std::cerr << "ViennaCL: FATAL ERROR: Kernel start failed for '" << k.name() << "'." << std::endl; std::cerr << "ViennaCL: Smaller work sizes could not solve the problem. " << std::endl; VIENNACL_ERR_CHECK(err); } } else //2D or 3D kernel { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Starting 2D/3D-kernel '" << k.name() << "'..." << std::endl; std::cout << "ViennaCL: Global work size: '" << k.global_work_size(0) << ", " << k.global_work_size(1) << ", " << k.global_work_size(2) << "'..." << std::endl; std::cout << "ViennaCL: Local work size: '" << k.local_work_size(0) << ", " << k.local_work_size(1) << ", " << k.local_work_size(2) << "'..." << std::endl; #endif vcl_size_t tmp_global[3]; tmp_global[0] = k.global_work_size(0); tmp_global[1] = k.global_work_size(1); tmp_global[2] = k.global_work_size(2); vcl_size_t tmp_local[3]; tmp_local[0] = k.local_work_size(0); tmp_local[1] = k.local_work_size(1); tmp_local[2] = k.local_work_size(2); cl_int err = clEnqueueNDRangeKernel(queue.handle().get(), k.handle().get(), (tmp_global[2] == 0) ? 2 : 3, NULL, tmp_global, tmp_local, 0, NULL, NULL); if (err != CL_SUCCESS) { //could not start kernel with any parameters std::cerr << "ViennaCL: FATAL ERROR: Kernel start failed for '" << k.name() << "'." << std::endl; VIENNACL_ERR_CHECK(err); } } #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) queue.finish(); std::cout << "ViennaCL: Kernel " << k.name() << " finished!" << std::endl; #endif } //enqueue() /** @brief Convenience function that enqueues the provided kernel into the first queue of the currently active device in the currently active context */ template void enqueue(KernelType & k) { enqueue(k, k.context().get_queue()); } inline void enqueue(viennacl::generator::custom_operation & op, viennacl::ocl::command_queue const & queue) { generator::enqueue_custom_op(op,queue); } inline void enqueue(viennacl::generator::custom_operation & op) { enqueue(op, viennacl::ocl::current_context().get_queue()); } } // namespace ocl } // namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/ocl/context.hpp000644 001750 001750 00000071072 12267307531 020546 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_CONTEXT_HPP_ #define VIENNACL_OCL_CONTEXT_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/context.hpp @brief Represents an OpenCL context within ViennaCL */ #ifdef __APPLE__ #include #else #include #endif #include #include #include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/handle.hpp" #include "viennacl/ocl/kernel.hpp" #include "viennacl/ocl/program.hpp" #include "viennacl/ocl/device.hpp" #include "viennacl/ocl/platform.hpp" #include "viennacl/ocl/command_queue.hpp" namespace viennacl { namespace ocl { /** @brief Manages an OpenCL context and provides the respective convenience functions for creating buffers, etc. * * This class was originally written before the OpenCL C++ bindings were standardized. * Regardless, it provides a couple of convience functionality which is not covered by the OpenCL C++ bindings. */ class context { typedef std::vector< viennacl::ocl::program > ProgramContainer; public: context() : initialized_(false), device_type_(CL_DEVICE_TYPE_DEFAULT), current_device_id_(0), default_device_num_(1), pf_index_(0), current_queue_id_(0) {} //////// Get and set default number of devices per context */ /** @brief Returns the maximum number of devices to be set up for the context */ vcl_size_t default_device_num() const { return default_device_num_; } /** @brief Sets the maximum number of devices to be set up for the context */ void default_device_num(vcl_size_t new_num) { default_device_num_ = new_num; } ////////// get and set preferred device type ///////////////////// /** @brief Returns the default device type for the context */ cl_device_type default_device_type() { return device_type_; } /** @brief Sets the device type for this context */ void default_device_type(cl_device_type dtype) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Setting new device type for context " << h_ << std::endl; #endif if (!initialized_) device_type_ = dtype; //assume that the user provided a correct value } //////////////////// get devices ////////////////// /** @brief Returns a vector with all devices in this context */ std::vector const & devices() const { return devices_; } /** @brief Returns the current device */ viennacl::ocl::device const & current_device() const { //std::cout << "Current device id in context: " << current_device_id_ << std::endl; return devices_[current_device_id_]; } /** @brief Switches the current device to the i-th device in this context */ void switch_device(vcl_size_t i) { assert(i < devices_.size() && bool("Provided device index out of range!")); current_device_id_ = i; } /** @brief If the supplied device is used within the context, it becomes the current active device. */ void switch_device(viennacl::ocl::device const & d) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Setting new current device for context " << h_ << std::endl; #endif bool found = false; for (vcl_size_t i=0; i directly, or make sure that you free to memory manually if you no longer need the allocated memory. */ cl_mem create_memory_without_smart_handle(cl_mem_flags flags, unsigned int size, void * ptr = NULL) const { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Creating memory of size " << size << " for context " << h_ << " (unsafe, returning cl_mem directly)" << std::endl; #endif if (ptr) flags |= CL_MEM_COPY_HOST_PTR; cl_int err; cl_mem mem = clCreateBuffer(h_.get(), flags, size, ptr, &err); VIENNACL_ERR_CHECK(err); return mem; } /** @brief Creates a memory buffer within the context * * @param flags OpenCL flags for the buffer creation * @param size Size of the memory buffer in bytes * @param ptr Optional pointer to CPU memory, with which the OpenCL memory should be initialized */ viennacl::ocl::handle create_memory(cl_mem_flags flags, unsigned int size, void * ptr = NULL) const { return viennacl::ocl::handle(create_memory_without_smart_handle(flags, size, ptr), *this); } /** @brief Creates a memory buffer within the context initialized from the supplied data * * @param flags OpenCL flags for the buffer creation * @param buffer A vector (STL vector, ublas vector, etc.) */ template < typename SCALARTYPE, typename A, template class VectorType > viennacl::ocl::handle create_memory(cl_mem_flags flags, const VectorType & buffer) const { return viennacl::ocl::handle(create_memory_without_smart_handle(flags, static_cast(sizeof(SCALARTYPE) * buffer.size()), (void*)&buffer[0]), *this); } //////////////////// create queues //////////////////////////////// /** @brief Adds an existing queue for the given device to the context */ void add_queue(cl_device_id dev, cl_command_queue q) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Adding existing queue " << q << " for device " << dev << " to context " << h_ << std::endl; #endif viennacl::ocl::handle queue_handle(q, *this); queues_[dev].push_back(viennacl::ocl::command_queue(queue_handle)); queues_[dev].back().handle().inc(); } /** @brief Adds a queue for the given device to the context */ void add_queue(cl_device_id dev) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Adding new queue for device " << dev << " to context " << h_ << std::endl; #endif cl_int err; #ifdef VIENNACL_PROFILING_ENABLED viennacl::ocl::handle temp(clCreateCommandQueue(h_.get(), dev, CL_QUEUE_PROFILING_ENABLE, &err), *this); #else viennacl::ocl::handle temp(clCreateCommandQueue(h_.get(), dev, 0, &err), *this); #endif VIENNACL_ERR_CHECK(err); queues_[dev].push_back(viennacl::ocl::command_queue(temp)); } /** @brief Adds a queue for the given device to the context */ void add_queue(viennacl::ocl::device d) { add_queue(d.id()); } //get queue for default device: viennacl::ocl::command_queue & get_queue() { return queues_[devices_[current_device_id_].id()][current_queue_id_]; } viennacl::ocl::command_queue const & get_queue() const { typedef std::map< cl_device_id, std::vector > QueueContainer; // find queue: QueueContainer::const_iterator it = queues_.find(devices_[current_device_id_].id()); if (it != queues_.end()) return (it->second)[current_queue_id_]; std::cerr << "ViennaCL: FATAL ERROR: Could not obtain current command queue!" << std::endl; std::cout << "Number of queues in context: " << queues_.size() << std::endl; std::cout << "Number of devices in context: " << devices_.size() << std::endl; throw "queue not found!"; //return (it->second)[current_queue_id_]; } //get a particular queue: /** @brief Returns the queue with the provided index for the given device */ viennacl::ocl::command_queue & get_queue(cl_device_id dev, vcl_size_t i = 0) { assert(i < queues_.size() && bool("In class 'context': id invalid in get_queue()")); #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Getting queue " << i << " for device " << dev << " in context " << h_ << std::endl; #endif unsigned int device_index; for (device_index = 0; device_index < devices_.size(); ++device_index) { if (devices_[device_index] == dev) break; } assert(device_index < devices_.size() && bool("Device not within context")); return queues_[devices_[device_index].id()][i]; } /** @brief Returns the current device */ // TODO: work out the const issues viennacl::ocl::command_queue const & current_queue() //const { return queues_[devices_[current_device_id_].id()][current_queue_id_]; } /** @brief Switches the current device to the i-th device in this context */ void switch_queue(vcl_size_t i) { assert(i < queues_[devices_[current_device_id_].id()].size() && bool("In class 'context': Provided queue index out of range for device!")); current_queue_id_ = i; } #if 1 /** @brief If the supplied command_queue is used within the context, it becomes the current active command_queue, the command_queue's device becomes current active device. */ void switch_queue(viennacl::ocl::command_queue const & q) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Setting new current queue for context " << h_ << std::endl; #endif bool found = false; typedef std::map< cl_device_id, std::vector > QueueContainer; // For each device: vcl_size_t j = 0; for (QueueContainer::const_iterator it=queues_.begin(); it != queues_.end(); it++,j++) { const std::vector & qv = (it->second); // For each queue candidate for (vcl_size_t i=0; iname() == name){ programs_.erase(it); return; } } } /** @brief Returns the program with the provided name */ viennacl::ocl::program & get_program(std::string const & name) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Getting program '" << name << "' from context " << h_ << std::endl; #endif for (ProgramContainer::iterator it = programs_.begin(); it != programs_.end(); ++it) { if (it->name() == name) return *it; } std::cerr << "Could not find program '" << name << "'" << std::endl; throw "In class 'context': name invalid in get_program()"; //return programs_[0]; //return a defined object } viennacl::ocl::program const & get_program(std::string const & name) const { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Getting program '" << name << "' from context " << h_ << std::endl; #endif for (ProgramContainer::const_iterator it = programs_.begin(); it != programs_.end(); ++it) { if (it->name() == name) return *it; } std::cerr << "Could not find program '" << name << "'" << std::endl; throw "In class 'context': name invalid in get_program()"; //return programs_[0]; //return a defined object } /** @brief Returns whether the program with the provided name exists or not */ bool has_program(std::string const & name){ for (ProgramContainer::iterator it = programs_.begin(); it != programs_.end(); ++it) { if (it->name() == name) return true; } return false; } /** @brief Returns the program with the provided id */ viennacl::ocl::program & get_program(vcl_size_t id) { assert(id < programs_.size() && bool("In class 'context': id invalid in get_program()")); return programs_[id]; } /** @brief Returns the number of programs within this context */ vcl_size_t program_num() { return programs_.size(); } /** @brief Convenience function for retrieving the kernel of a program directly from the context */ viennacl::ocl::kernel & get_kernel(std::string const & program_name, std::string const & kernel_name) { return get_program(program_name).get_kernel(kernel_name); } /** @brief Returns the number of devices within this context */ vcl_size_t device_num() { return devices_.size(); } /** @brief Returns the context handle */ const viennacl::ocl::handle & handle() const { return h_; } /** @brief Returns the current build option string */ std::string build_options() const { return build_options_; } /** @brief Sets the build option string, which is passed to the OpenCL compiler in subsequent compilations. Does not effect programs already compiled previously. */ void build_options(std::string op) { build_options_ = op; } /** @brief Returns the platform ID of the platform to be used for the context */ vcl_size_t platform_index() const { return pf_index_; } /** @brief Sets the platform ID of the platform to be used for the context */ void platform_index(vcl_size_t new_index) { assert(!initialized_ && bool("Platform ID must be set before context is initialized!")); pf_index_ = new_index; } /** @brief Less-than comparable for compatibility with std:map */ bool operator<(context const & other) const { return h_.get() < other.h_.get(); } bool operator==(context const & other) const { return h_.get() == other.h_.get(); } private: /** @brief Initialize a new context. Reuse any previously supplied information (devices, queues) */ void init_new() { assert(!initialized_ && bool("ViennaCL FATAL error: Context already created!")); #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Initializing new ViennaCL context." << std::endl; #endif cl_int err; std::vector device_id_array; if (devices_.empty()) //get the default device if user has not yet specified a list of devices { //create an OpenCL context for the provided devices: #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Setting all devices for context..." << std::endl; #endif platform pf(pf_index_); std::vector devices = pf.devices(device_type_); #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Number of devices for context: " << devices.size() << std::endl; #endif vcl_size_t device_num = std::min(default_device_num_, devices.size()); for (vcl_size_t i=0; i::const_iterator iter = devices_.begin(); iter != devices_.end(); ++iter) device_id_array.push_back(iter->id()); h_ = clCreateContext(0, static_cast(devices_.size()), &(device_id_array[0]), NULL, NULL, &err); VIENNACL_ERR_CHECK(err); initialized_ = true; #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Initialization of new ViennaCL context done." << std::endl; #endif } /** @brief Reuses a supplied context. */ void init_existing(cl_context c) { assert(!initialized_ && bool("ViennaCL FATAL error: Context already created!")); #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Initialization of ViennaCL context from existing context." << std::endl; #endif //set context handle: h_ = c; h_.inc(); // if the user provides the context, then the user will also call release() on the context. Without inc(), we would get a seg-fault due to double-free at program termination. if (devices_.empty()) { //get devices for context: cl_int err; cl_uint num_devices; vcl_size_t temp; //Note: The obvious // err = clGetContextInfo(h_, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL); //does not work with NVIDIA OpenCL stack! err = clGetContextInfo(h_.get(), CL_CONTEXT_DEVICES, VIENNACL_OCL_MAX_DEVICE_NUM * sizeof(cl_device_id), NULL, &temp); VIENNACL_ERR_CHECK(err); assert(temp > 0 && bool("ViennaCL: FATAL error: Provided context does not contain any devices!")); num_devices = static_cast(temp / sizeof(cl_device_id)); #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Reusing context with " << num_devices << " devices." << std::endl; #endif std::vector device_ids(num_devices); err = clGetContextInfo(h_.get(), CL_CONTEXT_DEVICES, num_devices * sizeof(cl_device_id), &(device_ids[0]), NULL); VIENNACL_ERR_CHECK(err); for (vcl_size_t i=0; i h_; std::vector< viennacl::ocl::device > devices_; vcl_size_t current_device_id_; vcl_size_t default_device_num_; ProgramContainer programs_; std::map< cl_device_id, std::vector< viennacl::ocl::command_queue> > queues_; std::string build_options_; vcl_size_t pf_index_; vcl_size_t current_queue_id_; }; //context /** @brief Adds a kernel to the program */ inline viennacl::ocl::kernel & viennacl::ocl::program::add_kernel(cl_kernel kernel_handle, std::string const & kernel_name) { assert(p_context_ != NULL && bool("Pointer to context invalid in viennacl::ocl::program object")); viennacl::ocl::kernel temp(kernel_handle, *this, *p_context_, kernel_name); kernels_.push_back(temp); return kernels_.back(); } /** @brief Returns the kernel with the provided name */ inline viennacl::ocl::kernel & viennacl::ocl::program::get_kernel(std::string const & name) { //std::cout << "Requiring kernel " << name << " from program " << name_ << std::endl; for (KernelContainer::iterator it = kernels_.begin(); it != kernels_.end(); ++it) { if (it->name() == name) return *it; } std::cerr << "ViennaCL: FATAL ERROR: Could not find kernel '" << name << "' from program '" << name_ << "'" << std::endl; std::cout << "Number of kernels in program: " << kernels_.size() << std::endl; throw "Kernel not found"; //return kernels_[0]; //return a defined object } inline void viennacl::ocl::kernel::set_work_size_defaults() { assert( p_program_ != NULL && bool("Kernel not initialized, program pointer invalid.")); assert( p_context_ != NULL && bool("Kernel not initialized, context pointer invalid.")); if ( (p_context_->current_device().type() == CL_DEVICE_TYPE_GPU) || (p_context_->current_device().type() == CL_DEVICE_TYPE_ACCELERATOR) // Xeon Phi ) { local_work_size_[0] = 128; local_work_size_[1] = 0; local_work_size_[2] = 0; global_work_size_[0] = 128*128; global_work_size_[1] = 0; global_work_size_[2] = 0; } else //assume CPU type: { //conservative assumption: one thread per CPU core: local_work_size_[0] = 1; local_work_size_[1] = 0; local_work_size_[2] = 0; size_type units = p_context_->current_device().max_compute_units(); size_type s = 1; while (s < units) // find next power of 2. Important to make reductions work on e.g. six-core CPUs. s *= 2; global_work_size_[0] = s; global_work_size_[1] = 0; global_work_size_[2] = 0; } } } } #endif ViennaCL-1.5.1-src/viennacl/ocl/device.hpp000644 001750 001750 00000177015 12267307531 020325 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_DEVICE_HPP_ #define VIENNACL_OCL_DEVICE_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/device.hpp @brief Represents an OpenCL device within ViennaCL */ #ifdef __APPLE__ #include #else #include #endif #include #include #include #include #include #include "viennacl/ocl/device_utils.hpp" #include "viennacl/ocl/handle.hpp" #include "viennacl/ocl/error.hpp" namespace viennacl { namespace ocl { /** @brief A class representing a compute device (e.g. a GPU) * */ class device { public: explicit device() : device_(0) { flush_cache(); } explicit device(cl_device_id dev) : device_(dev) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE) std::cout << "ViennaCL: Creating device object (CTOR with cl_device_id)" << std::endl; #endif flush_cache(); } device(const device & other) : device_(0) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE) std::cout << "ViennaCL: Creating device object (Copy CTOR)" << std::endl; #endif if (device_ != other.device_) { device_ = other.device_; flush_cache(); } } /** @brief The default compute device address space size specified as an unsigned integer value in bits. Currently supported values are 32 or 64 bits. */ cl_uint address_bits() const { if (!address_bits_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), static_cast(&address_bits_), NULL); VIENNACL_ERR_CHECK(err); address_bits_valid_ = true; } return address_bits_; } /** @brief Is CL_TRUE if the device is available and CL_FALSE if the device is not available. */ cl_bool available() const { if (!available_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_AVAILABLE, sizeof(cl_bool), static_cast(&available_), NULL); VIENNACL_ERR_CHECK(err); available_valid_ = true; } return available_; } /** @brief Is CL_FALSE if the implementation does not have a compiler available to compile the program source. Is CL_TRUE if the compiler is available. This can be CL_FALSE for the embedded platform profile only. */ cl_bool compiler_available() const { if (!compiler_available_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_COMPILER_AVAILABLE , sizeof(cl_bool), static_cast(&compiler_available_), NULL); VIENNACL_ERR_CHECK(err); compiler_available_valid_ = true; } return compiler_available_; } #ifdef CL_DEVICE_DOUBLE_FP_CONFIG /** @brief Describes the OPTIONAL double precision floating-point capability of the OpenCL device. * * This is a bit-field that describes one or more of the following values: * CL_FP_DENORM - denorms are supported. * CL_FP_INF_NAN - INF and NaNs are supported. * CL_FP_ROUND_TO_NEAREST - round to nearest even rounding mode supported. * CL_FP_ROUND_TO_ZERO - round to zero rounding mode supported. * CL_FP_ROUND_TO_INF - round to +ve and -ve infinity rounding modes supported. * CP_FP_FMA - IEEE754-2008 fused multiply-add is supported. * * The mandated minimum double precision floating-point capability is * CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM. */ cl_device_fp_config double_fp_config() const { if (!double_fp_config_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(cl_device_fp_config), static_cast(&double_fp_config_), NULL); VIENNACL_ERR_CHECK(err); double_fp_config_valid_ = true; } return double_fp_config_; } #endif /** @brief Is CL_TRUE if the OpenCL device is a little endian device and CL_FALSE otherwise. */ cl_bool endian_little() const { if (!endian_little_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ENDIAN_LITTLE, sizeof(cl_bool), static_cast(&endian_little_), NULL); VIENNACL_ERR_CHECK(err); endian_little_valid_ = true; } return endian_little_; } /** @brief Is CL_TRUE if the device implements error correction for all accesses to compute device memory (global and constant) and CL_FALSE otherwise. */ cl_bool error_correction_support() const { if (!error_correction_support_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ERROR_CORRECTION_SUPPORT , sizeof(cl_bool), static_cast(&error_correction_support_), NULL); VIENNACL_ERR_CHECK(err); error_correction_support_valid_ = true; } return error_correction_support_; } /** @brief Describes the execution capabilities of the device. * * This is a bit-field that describes one or more of the following values: * CL_EXEC_KERNEL - The OpenCL device can execute OpenCL kernels. * CL_EXEC_NATIVE_KERNEL - The OpenCL device can execute native kernels. * The mandated minimum capability is CL_EXEC_KERNEL. */ cl_device_exec_capabilities execution_capabilities() const { if (!execution_capabilities_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_EXECUTION_CAPABILITIES , sizeof(cl_device_exec_capabilities), static_cast(&execution_capabilities_), NULL); VIENNACL_ERR_CHECK(err); execution_capabilities_valid_ = true; } return execution_capabilities_; } /** @brief Returns a space-separated list of extension names (the extension names themselves do not contain any spaces). * * The list of extension names returned currently can include one or more of the following approved extension names: * cl_khr_fp64 * cl_khr_int64_base_atomics * cl_khr_int64_extended_atomics * cl_khr_fp16 * cl_khr_gl_sharing * cl_khr_gl_event * cl_khr_d3d10_sharing */ std::string extensions() const { if (!extensions_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_EXTENSIONS, sizeof(char) * 2048, static_cast(&extensions_), NULL); VIENNACL_ERR_CHECK(err); extensions_valid_ = true; } return extensions_; } /** @brief Size of global memory cache in bytes. */ cl_ulong global_mem_cache_size() const { if (!global_mem_cache_size_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(cl_ulong), static_cast(&global_mem_cache_size_), NULL); VIENNACL_ERR_CHECK(err); global_mem_cache_size_valid_ = true; } return global_mem_cache_size_; } /** @brief Type of global memory cache supported. Valid values are: CL_NONE, CL_READ_ONLY_CACHE, and CL_READ_WRITE_CACHE. */ cl_device_mem_cache_type global_mem_cache_type() const { if (!global_mem_cache_type_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof(cl_device_mem_cache_type), static_cast(&global_mem_cache_type_), NULL); VIENNACL_ERR_CHECK(err); global_mem_cache_type_valid_ = true; } return global_mem_cache_type_; } /** @brief Size of global memory cache in bytes. */ cl_uint global_mem_cacheline_size() const { if (!global_mem_cacheline_size_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cl_uint), static_cast(&global_mem_cacheline_size_), NULL); VIENNACL_ERR_CHECK(err); global_mem_cacheline_size_valid_ = true; } return global_mem_cacheline_size_; } /** @brief Size of global memory in bytes. */ cl_ulong global_mem_size() const { if (!global_mem_size_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), static_cast(&global_mem_size_), NULL); VIENNACL_ERR_CHECK(err); global_mem_size_valid_ = true; } return global_mem_size_; } #ifdef CL_DEVICE_HALF_FP_CONFIG /** @brief Describes the OPTIONAL half precision floating-point capability of the OpenCL device. * * This is a bit-field that describes one or more of the following values: * CL_FP_DENORM - denorms are supported. * CL_FP_INF_NAN - INF and NaNs are supported. * CL_FP_ROUND_TO_NEAREST - round to nearest even rounding mode supported. * CL_FP_ROUND_TO_ZERO - round to zero rounding mode supported. * CL_FP_ROUND_TO_INF - round to +ve and -ve infinity rounding modes supported. * CP_FP_FMA - IEEE754-2008 fused multiply-add is supported. * * The required minimum half precision floating-point capability as implemented by this extension is CL_FP_ROUND_TO_ZERO or CL_FP_ROUND_TO_INF | CL_FP_INF_NAN. */ cl_device_fp_config half_fp_config() const { if (!half_fp_config_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_HALF_FP_CONFIG, sizeof(cl_device_fp_config), static_cast(&half_fp_config_), NULL); VIENNACL_ERR_CHECK(err); half_fp_config_valid_ = true; } return half_fp_config_; } #endif /** @brief Is CL_TRUE if the device and the host have a unified memory subsystem and is CL_FALSE otherwise. */ cl_bool host_unified_memory() const { if (!host_unified_memory_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_bool), static_cast(&host_unified_memory_), NULL); VIENNACL_ERR_CHECK(err); host_unified_memory_valid_ = true; } return host_unified_memory_; } /** @brief Is CL_TRUE if images are supported by the OpenCL device and CL_FALSE otherwise. */ cl_bool image_support() const { if (!image_support_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), static_cast(&image_support_), NULL); VIENNACL_ERR_CHECK(err); image_support_valid_ = true; } return image_support_; } /** @brief Max height of 2D image in pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. */ size_t image2d_max_height() const { if (!image2d_max_height_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), static_cast(&image2d_max_height_), NULL); VIENNACL_ERR_CHECK(err); image2d_max_height_valid_ = true; } return image2d_max_height_; } /** @brief Max width of 2D image in pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. */ size_t image2d_max_width() const { if (!image2d_max_width_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), static_cast(&image2d_max_width_), NULL); VIENNACL_ERR_CHECK(err); image2d_max_width_valid_ = true; } return image2d_max_width_; } /** @brief Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. */ size_t image3d_max_depth() const { if (!image3d_max_depth_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), static_cast(&image3d_max_depth_), NULL); VIENNACL_ERR_CHECK(err); image3d_max_depth_valid_ = true; } return image3d_max_depth_; } /** @brief Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. */ size_t image3d_max_height() const { if (!image3d_max_height_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), static_cast(&image3d_max_height_), NULL); VIENNACL_ERR_CHECK(err); image3d_max_height_valid_ = true; } return image3d_max_height_; } /** @brief Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. */ size_t image3d_max_width() const { if (!image3d_max_width_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), static_cast(&image3d_max_width_), NULL); VIENNACL_ERR_CHECK(err); image3d_max_width_valid_ = true; } return image3d_max_width_; } /** @brief Size of local memory arena in bytes. The minimum value is 32 KB. */ cl_ulong local_mem_size() const { if (!local_mem_size_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), static_cast(&local_mem_size_), NULL); VIENNACL_ERR_CHECK(err); local_mem_size_valid_ = true; } return local_mem_size_; } /** @brief Type of local memory supported. This can be set to CL_LOCAL implying dedicated local memory storage such as SRAM, or CL_GLOBAL. */ cl_device_local_mem_type local_mem_type() const { if (!local_mem_type_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(cl_device_local_mem_type), static_cast(&local_mem_type_), NULL); VIENNACL_ERR_CHECK(err); local_mem_type_valid_ = true; } return local_mem_type_; } /** @brief Maximum configured clock frequency of the device in MHz. */ cl_uint max_clock_frequency() const { if (!max_clock_frequency_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), static_cast(&max_clock_frequency_), NULL); VIENNACL_ERR_CHECK(err); max_clock_frequency_valid_ = true; } return max_clock_frequency_; } /** @brief The number of parallel compute cores on the OpenCL device. The minimum value is 1. */ cl_uint max_compute_units() const { if (!max_compute_units_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), static_cast(&max_compute_units_), NULL); VIENNACL_ERR_CHECK(err); max_compute_units_valid_ = true; } return max_compute_units_; } /** @brief Max number of arguments declared with the __constant qualifier in a kernel. The minimum value is 8. */ cl_uint max_constant_args() const { if (!max_constant_args_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(cl_uint), static_cast(&max_constant_args_), NULL); VIENNACL_ERR_CHECK(err); max_constant_args_valid_ = true; } return max_constant_args_; } /** @brief Max size in bytes of a constant buffer allocation. The minimum value is 64 KB. */ cl_ulong max_constant_buffer_size() const { if (!max_constant_buffer_size_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), static_cast(&max_constant_buffer_size_), NULL); VIENNACL_ERR_CHECK(err); max_constant_buffer_size_valid_ = true; } return max_constant_buffer_size_; } /** @brief Max size of memory object allocation in bytes. The minimum value is max(1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024) */ cl_ulong max_mem_alloc_size() const { if (!max_mem_alloc_size_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), static_cast(&max_mem_alloc_size_), NULL); VIENNACL_ERR_CHECK(err); max_mem_alloc_size_valid_ = true; } return max_mem_alloc_size_; } /** @brief Max size in bytes of the arguments that can be passed to a kernel. The minimum value is 1024. * * For this minimum value, only a maximum of 128 arguments can be passed to a kernel. */ size_t max_parameter_size() const { if (!max_parameter_size_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t), static_cast(&max_parameter_size_), NULL); VIENNACL_ERR_CHECK(err); max_parameter_size_valid_ = true; } return max_parameter_size_; } /** @brief Max number of simultaneous image objects that can be read by a kernel. The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. */ cl_uint max_read_image_args() const { if (!max_read_image_args_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(cl_uint), static_cast(&max_read_image_args_), NULL); VIENNACL_ERR_CHECK(err); max_read_image_args_valid_ = true; } return max_read_image_args_; } /** @brief Max number of simultaneous image objects that can be read by a kernel. The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. */ cl_uint max_samplers() const { if (!max_samplers_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_SAMPLERS, sizeof(cl_uint), static_cast(&max_samplers_), NULL); VIENNACL_ERR_CHECK(err); max_samplers_valid_ = true; } return max_samplers_; } /** @brief Maximum number of work-items in a work-group executing a kernel using the data parallel execution model. The minimum value is 1. */ size_t max_work_group_size() const { if (!max_work_group_size_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), static_cast(&max_work_group_size_), NULL); VIENNACL_ERR_CHECK(err); max_work_group_size_valid_ = true; } return max_work_group_size_; } /** @brief Maximum dimensions that specify the global and local work-item IDs used by the data parallel execution model. The minimum value is 3. */ cl_uint max_work_item_dimensions() const { if (!max_work_item_dimensions_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), static_cast(&max_work_item_dimensions_), NULL); VIENNACL_ERR_CHECK(err); max_work_item_dimensions_valid_ = true; } return max_work_item_dimensions_; } /** @brief Maximum number of work-items that can be specified in each dimension of the work-group. * * Returns n size_t entries, where n is the value returned by the query for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS. The minimum value is (1, 1, 1). */ std::vector max_work_item_sizes() const { std::vector result(max_work_item_dimensions()); assert(result.size() < 16 && bool("Supported work item dimensions exceed available capacity!")); if (!max_work_item_sizes_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 16, static_cast(&max_work_item_sizes_), NULL); VIENNACL_ERR_CHECK(err); max_work_item_sizes_valid_ = true; } for (vcl_size_t i=0; i(&max_write_image_args_), NULL); VIENNACL_ERR_CHECK(err); max_write_image_args_valid_ = true; } return max_write_image_args_; } /** @brief Describes the alignment in bits of the base address of any allocated memory object. */ cl_uint mem_base_addr_align() const { if (!mem_base_addr_align_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), static_cast(&mem_base_addr_align_), NULL); VIENNACL_ERR_CHECK(err); mem_base_addr_align_valid_ = true; } return mem_base_addr_align_; } /** @brief The smallest alignment in bytes which can be used for any data type. */ cl_uint min_data_type_align_size() const { if (!min_data_type_align_size_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, sizeof(cl_uint), static_cast(&min_data_type_align_size_), NULL); VIENNACL_ERR_CHECK(err); min_data_type_align_size_valid_ = true; } return min_data_type_align_size_; } /** @brief Device name string. */ std::string name() const { if (!name_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NAME, sizeof(char) * 256, static_cast(name_), NULL); VIENNACL_ERR_CHECK(err); name_valid_ = true; } return name_; } /** @brief Device architecture family. */ device_architecture_family architecture_family() const { if( !architecture_family_valid_) { architecture_family_ = get_device_architecture(vendor_id(), name()); architecture_family_valid_ = true; } return architecture_family_; } /** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. */ cl_uint native_vector_width_char() const { if (!native_vector_width_char_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, sizeof(cl_uint), static_cast(&native_vector_width_char_), NULL); VIENNACL_ERR_CHECK(err); native_vector_width_char_valid_ = true; } return native_vector_width_char_; } /** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. */ cl_uint native_vector_width_short() const { if (!native_vector_width_short_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, sizeof(cl_uint), static_cast(&native_vector_width_short_), NULL); VIENNACL_ERR_CHECK(err); native_vector_width_short_valid_ = true; } return native_vector_width_short_; } /** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. */ cl_uint native_vector_width_int() const { if (!native_vector_width_int_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof(cl_uint), static_cast(&native_vector_width_int_), NULL); VIENNACL_ERR_CHECK(err); native_vector_width_int_valid_ = true; } return native_vector_width_int_; } /** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. */ cl_uint native_vector_width_long() const { if (!native_vector_width_long_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof(cl_uint), static_cast(&native_vector_width_long_), NULL); VIENNACL_ERR_CHECK(err); native_vector_width_long_valid_ = true; } return native_vector_width_long_; } /** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. */ cl_uint native_vector_width_float() const { if (!native_vector_width_float_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), static_cast(&native_vector_width_float_), NULL); VIENNACL_ERR_CHECK(err); native_vector_width_float_valid_ = true; } return native_vector_width_float_; } /** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. * * If the cl_khr_fp64 extension is not supported, this function returns 0. */ cl_uint native_vector_width_double() const { if (!native_vector_width_double_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), static_cast(&native_vector_width_double_), NULL); VIENNACL_ERR_CHECK(err); native_vector_width_double_valid_ = true; } return native_vector_width_double_; } /** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. * * If the cl_khr_fp16 extension is not supported, this function returns 0. */ cl_uint native_vector_width_half() const { if (!native_vector_width_half_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, sizeof(cl_uint), static_cast(&native_vector_width_half_), NULL); VIENNACL_ERR_CHECK(err); native_vector_width_half_valid_ = true; } return native_vector_width_half_; } /** @brief OpenCL C version string. Returns the highest OpenCL C version supported by the compiler for this device. * * This version string has the following format: * OpenCL[space]C[space][major_version.minor_version][space][vendor-specific information] * The major_version.minor_version value must be 1.1 if CL_DEVICE_VERSION is OpenCL 1.1. * The major_version.minor_version value returned can be 1.0 or 1.1 if CL_DEVICE_VERSION is OpenCL 1.0. * If OpenCL C 1.1 is returned, this implies that the language feature set defined in section 6 of the OpenCL 1.1 specification is supported by the OpenCL 1.0 device. */ std::string opencl_c_version() const { if (!opencl_c_version_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_OPENCL_C_VERSION, sizeof(char) * 128, static_cast(opencl_c_version_), NULL); VIENNACL_ERR_CHECK(err); opencl_c_version_valid_ = true; } return opencl_c_version_; } /** @brief The platform associated with this device. */ cl_platform_id platform() const { if (!platform_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), static_cast(&platform_), NULL); VIENNACL_ERR_CHECK(err); platform_valid_ = true; } return platform_; } /** @brief Preferred native vector width size for built-in scalar types that can be put into vectors. The vector width is defined as the number of scalar elements that can be stored in the vector. */ cl_uint preferred_vector_width_char() const { if (!preferred_vector_width_char_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), static_cast(&preferred_vector_width_char_), NULL); VIENNACL_ERR_CHECK(err); preferred_vector_width_char_valid_ = true; } return preferred_vector_width_char_; } /** @brief Preferred native vector width size for built-in scalar types that can be put into vectors. The vector width is defined as the number of scalar elements that can be stored in the vector. */ cl_uint preferred_vector_width_short() const { if (!preferred_vector_width_short_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), static_cast(&preferred_vector_width_short_), NULL); VIENNACL_ERR_CHECK(err); preferred_vector_width_short_valid_ = true; } return preferred_vector_width_short_; } /** @brief Preferred native vector width size for built-in scalar types that can be put into vectors. The vector width is defined as the number of scalar elements that can be stored in the vector. */ cl_uint preferred_vector_width_int() const { if (!preferred_vector_width_int_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), static_cast(&preferred_vector_width_int_), NULL); VIENNACL_ERR_CHECK(err); preferred_vector_width_int_valid_ = true; } return preferred_vector_width_int_; } /** @brief Preferred native vector width size for built-in scalar types that can be put into vectors. The vector width is defined as the number of scalar elements that can be stored in the vector. */ cl_uint preferred_vector_width_long() const { if (!preferred_vector_width_long_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), static_cast(&preferred_vector_width_long_), NULL); VIENNACL_ERR_CHECK(err); preferred_vector_width_long_valid_ = true; } return preferred_vector_width_long_; } /** @brief Preferred native vector width size for built-in scalar types that can be put into vectors. The vector width is defined as the number of scalar elements that can be stored in the vector. */ cl_uint preferred_vector_width_float() const { if (!preferred_vector_width_float_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), static_cast(&preferred_vector_width_float_), NULL); VIENNACL_ERR_CHECK(err); preferred_vector_width_float_valid_ = true; } return preferred_vector_width_float_; } /** @brief Preferred native vector width size for built-in scalar types that can be put into vectors. The vector width is defined as the number of scalar elements that can be stored in the vector. * * If the cl_khr_fp64 extension is not supported, this function returns 0. */ cl_uint preferred_vector_width_double() const { if (!preferred_vector_width_double_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), static_cast(&preferred_vector_width_double_), NULL); VIENNACL_ERR_CHECK(err); preferred_vector_width_double_valid_ = true; } return preferred_vector_width_double_; } /** @brief Preferred native vector width size for built-in scalar types that can be put into vectors. The vector width is defined as the number of scalar elements that can be stored in the vector. * * If the cl_khr_fp16 extension is not supported, this function returns 0. */ cl_uint preferred_vector_width_half() const { if (!preferred_vector_width_half_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, sizeof(cl_uint), static_cast(&preferred_vector_width_half_), NULL); VIENNACL_ERR_CHECK(err); preferred_vector_width_half_valid_ = true; } return preferred_vector_width_half_; } /** @brief OpenCL profile string. Returns the profile name supported by the device. * * The profile name returned can be one of the following strings: * FULL_PROFILE - if the device supports the OpenCL specification * EMBEDDED_PROFILE - if the device supports the OpenCL embedded profile. */ std::string profile() const { if (!profile_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PROFILE, sizeof(char) * 32, static_cast(profile_), NULL); VIENNACL_ERR_CHECK(err); profile_valid_ = true; } return profile_; } /** @brief Describes the resolution of device timer. This is measured in nanoseconds. */ size_t profiling_timer_resolution() const { if (!profiling_timer_resolution_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(size_t), static_cast(&profiling_timer_resolution_), NULL); VIENNACL_ERR_CHECK(err); profiling_timer_resolution_valid_ = true; } return profiling_timer_resolution_; } /** @brief Describes the command-queue properties supported by the device. * * This is a bit-field that describes one or more of the following values: * CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE * CL_QUEUE_PROFILING_ENABLE3 * These properties are described in the table for clCreateCommandQueue in the OpenCL standard. * The mandated minimum capability is CL_QUEUE_PROFILING_ENABLE. */ cl_command_queue_properties queue_properties() const { if (!queue_properties_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), static_cast(&queue_properties_), NULL); VIENNACL_ERR_CHECK(err); queue_properties_valid_ = true; } return queue_properties_; } /** @brief Describes single precision floating-point capability of the OpenCL device. * * This is a bit-field that describes one or more of the following values: * CL_FP_DENORM - denorms are supported. * CL_FP_INF_NAN - INF and NaNs are supported. * CL_FP_ROUND_TO_NEAREST - round to nearest even rounding mode supported. * CL_FP_ROUND_TO_ZERO - round to zero rounding mode supported. * CL_FP_ROUND_TO_INF - round to +ve and -ve infinity rounding modes supported. * CP_FP_FMA - IEEE754-2008 fused multiply-add is supported. * CL_FP_SOFT_FLOAT - Basic floating-point operations (such as addition, subtraction, multiplication) are implemented in software. * * The mandated minimum floating-point capability is CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN. */ cl_device_fp_config single_fp_config() const { if (!single_fp_config_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), static_cast(&single_fp_config_), NULL); VIENNACL_ERR_CHECK(err); single_fp_config_valid_ = true; } return single_fp_config_; } /** @brief The OpenCL device type. * * Currently supported values are one of or a combination of: CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_ACCELERATOR, or CL_DEVICE_TYPE_DEFAULT. */ cl_device_type type() const { if (!type_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_TYPE, sizeof(cl_device_type), static_cast(&type_), NULL); VIENNACL_ERR_CHECK(err); type_valid_ = true; } return type_; } /** @brief Vendor name string. */ std::string vendor() const { if (!vendor_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VENDOR, sizeof(char) * 256, static_cast(vendor_), NULL); VIENNACL_ERR_CHECK(err); vendor_valid_ = true; } return vendor_; } /** @brief A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID. */ cl_uint vendor_id() const { if (!vendor_id_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VENDOR_ID, sizeof(cl_uint), static_cast(&vendor_id_), NULL); VIENNACL_ERR_CHECK(err); vendor_id_valid_ = true; } return vendor_id_; } /** @brief Vendor name string. */ std::string version() const { if (!version_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VERSION, sizeof(char) * 256, static_cast(version_), NULL); VIENNACL_ERR_CHECK(err); version_valid_ = true; } return version_; } /** @brief Vendor name string. */ std::string driver_version() const { if (!driver_version_valid_) { cl_int err = clGetDeviceInfo(device_, CL_DRIVER_VERSION, sizeof(char) * 256, static_cast(driver_version_), NULL); VIENNACL_ERR_CHECK(err); driver_version_valid_ = true; } return driver_version_; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// /** @brief ViennaCL convenience function: Returns true if the device supports double precision */ bool double_support() const { std::string ext = extensions(); if (ext.find("cl_khr_fp64") != std::string::npos || ext.find("cl_amd_fp64") != std::string::npos) return true; return false; } /** @brief ViennaCL convenience function: Returns the device extension which enables double precision (usually cl_khr_fp64, but AMD used cl_amd_fp64 in the past) */ std::string double_support_extension() const { std::string ext = extensions(); if (ext.find("cl_amd_fp64") != std::string::npos) //AMD extension return "cl_amd_fp64"; if (ext.find("cl_khr_fp64") != std::string::npos) //Khronos-certified standard extension for double precision return "cl_khr_fp64"; return ""; } /** @brief Returns the OpenCL device id */ cl_device_id id() const { assert(device_ != 0 && bool("Device ID invalid!")); return device_; } /** @brief Returns an info string with a few properties of the device. Use full_info() to get all details. * * Returns the following device properties: * name, vendor, type, availability, max compute units, max work group size, global mem size, local mem size, local mem type, host unified memory * * @param indent Number of optional blanks to be added at the start of each line * @param indent_char Character to be used for indenting */ std::string info(vcl_size_t indent = 0, char indent_char = ' ') const { std::string line_indent(indent, indent_char); std::ostringstream oss; oss << line_indent << "Name: " << name() << std::endl; oss << line_indent << "Vendor: " << vendor() << std::endl; oss << line_indent << "Type: " << device_type_to_string(type()) << std::endl; oss << line_indent << "Available: " << available() << std::endl; oss << line_indent << "Max Compute Units: " << max_compute_units() << std::endl; oss << line_indent << "Max Work Group Size: " << max_work_group_size() << std::endl; oss << line_indent << "Global Mem Size: " << global_mem_size() << std::endl; oss << line_indent << "Local Mem Size: " << local_mem_size() << std::endl; oss << line_indent << "Local Mem Type: " << local_mem_type() << std::endl; oss << line_indent << "Host Unified Memory: " << host_unified_memory() << std::endl; return oss.str(); } /** @brief Returns an info string with all device properties defined in the OpenCL 1.1 standard, listed in alphabetical order. Use info() for a short overview. * * @param indent Number of optional blanks to be added at the start of each line * @param indent_char Character to be used for indenting */ std::string full_info(vcl_size_t indent = 0, char indent_char = ' ') const { std::string line_indent(indent, indent_char); std::ostringstream oss; oss << line_indent << "Address Bits: " << address_bits() << std::endl; oss << line_indent << "Available: " << available() << std::endl; oss << line_indent << "Compiler Available: " << compiler_available() << std::endl; #ifdef CL_DEVICE_DOUBLE_FP_CONFIG oss << line_indent << "Double FP Config: " << fp_config_to_string(double_fp_config()) << std::endl; #endif oss << line_indent << "Endian Little: " << endian_little() << std::endl; oss << line_indent << "Error Correction Support: " << error_correction_support() << std::endl; oss << line_indent << "Execution Capabilities: " << exec_capabilities_to_string(execution_capabilities()) << std::endl; oss << line_indent << "Extensions: " << extensions() << std::endl; oss << line_indent << "Global Mem Cache Size: " << global_mem_cache_size() << " Bytes" << std::endl; oss << line_indent << "Global Mem Cache Type: " << mem_cache_type_to_string(global_mem_cache_type()) << std::endl; oss << line_indent << "Global Mem Cacheline Size: " << global_mem_cacheline_size() << " Bytes" << std::endl; oss << line_indent << "Global Mem Size: " << global_mem_size() << " Bytes" << std::endl; #ifdef CL_DEVICE_HALF_FP_CONFIG oss << line_indent << "Half PF Config: " << fp_config_to_string(half_fp_config()) << std::endl; #endif oss << line_indent << "Host Unified Memory: " << host_unified_memory() << std::endl; oss << line_indent << "Image Support: " << image_support() << std::endl; oss << line_indent << "Image2D Max Height: " << image2d_max_height() << std::endl; oss << line_indent << "Image2D Max Width: " << image2d_max_width() << std::endl; oss << line_indent << "Image3D Max Depth: " << image3d_max_depth() << std::endl; oss << line_indent << "Image3D Max Height: " << image3d_max_height() << std::endl; oss << line_indent << "Image3D Max Width: " << image3d_max_width() << std::endl; oss << line_indent << "Local Mem Size: " << local_mem_size() << " Bytes" << std::endl; oss << line_indent << "Local Mem Type: " << local_mem_type_to_string(local_mem_type()) << std::endl; oss << line_indent << "Max Clock Frequency: " << max_clock_frequency() << " MHz" << std::endl; oss << line_indent << "Max Compute Units: " << max_compute_units() << std::endl; oss << line_indent << "Max Constant Args: " << max_constant_args() << std::endl; oss << line_indent << "Max Constant Buffer Size: " << max_constant_buffer_size() << " Bytes" << std::endl; oss << line_indent << "Max Mem Alloc Size: " << max_mem_alloc_size() << " Bytes" << std::endl; oss << line_indent << "Max Parameter Size: " << max_parameter_size() << " Bytes" << std::endl; oss << line_indent << "Max Read Image Args: " << max_read_image_args() << std::endl; oss << line_indent << "Max Samplers: " << max_samplers() << std::endl; oss << line_indent << "Max Work Group Size: " << max_work_group_size() << std::endl; oss << line_indent << "Max Work Item Dimensions: " << max_work_item_dimensions() << std::endl; oss << line_indent << "Max Work Item Sizes: " << convert_to_string(max_work_item_sizes()) << std::endl; oss << line_indent << "Max Write Image Args: " << max_write_image_args() << std::endl; oss << line_indent << "Mem Base Addr Align: " << mem_base_addr_align() << std::endl; oss << line_indent << "Min Data Type Align Size: " << min_data_type_align_size() << " Bytes" << std::endl; oss << line_indent << "Name: " << name() << std::endl; oss << line_indent << "Native Vector Width char: " << native_vector_width_char() << std::endl; oss << line_indent << "Native Vector Width short: " << native_vector_width_short() << std::endl; oss << line_indent << "Native Vector Width int: " << native_vector_width_int() << std::endl; oss << line_indent << "Native Vector Width long: " << native_vector_width_long() << std::endl; oss << line_indent << "Native Vector Width float: " << native_vector_width_float() << std::endl; oss << line_indent << "Native Vector Width double: " << native_vector_width_double() << std::endl; oss << line_indent << "Native Vector Width half: " << native_vector_width_half() << std::endl; oss << line_indent << "OpenCL C Version: " << opencl_c_version() << std::endl; oss << line_indent << "Platform: " << platform() << std::endl; oss << line_indent << "Preferred Vector Width char: " << preferred_vector_width_char() << std::endl; oss << line_indent << "Preferred Vector Width short: " << preferred_vector_width_short() << std::endl; oss << line_indent << "Preferred Vector Width int: " << preferred_vector_width_int() << std::endl; oss << line_indent << "Preferred Vector Width long: " << preferred_vector_width_long() << std::endl; oss << line_indent << "Preferred Vector Width float: " << preferred_vector_width_float() << std::endl; oss << line_indent << "Preferred Vector Width double: " << preferred_vector_width_double() << std::endl; oss << line_indent << "Preferred Vector Width half: " << preferred_vector_width_half() << std::endl; oss << line_indent << "Profile: " << profile() << std::endl; oss << line_indent << "Profiling Timer Resolution: " << profiling_timer_resolution() << " ns" << std::endl; oss << line_indent << "Queue Properties: " << queue_properties_to_string(queue_properties()) << std::endl; oss << line_indent << "Single FP Config: " << fp_config_to_string(single_fp_config()) << std::endl; oss << line_indent << "Type: " << device_type_to_string(type()) << std::endl; oss << line_indent << "Vendor: " << vendor() << std::endl; oss << line_indent << "Vendor ID: " << vendor_id() << std::endl; oss << line_indent << "Version: " << version() << std::endl; oss << line_indent << "Driver Version: " << driver_version() << std::endl; return oss.str(); } bool operator==(device const & other) const { return device_ == other.device_; } bool operator==(cl_device_id other) const { return device_ == other; } private: /** @brief Helper function converting a floating point configuration to a string */ std::string fp_config_to_string(cl_device_fp_config conf) const { std::ostringstream oss; if (conf & CL_FP_DENORM) oss << "CL_FP_DENORM "; if (conf & CL_FP_INF_NAN) oss << "CL_FP_INF_NAN "; if (conf & CL_FP_ROUND_TO_NEAREST) oss << "CL_FP_ROUND_TO_NEAREST "; if (conf & CL_FP_ROUND_TO_ZERO) oss << "CL_FP_ROUND_TO_ZERO "; if (conf & CL_FP_ROUND_TO_INF) oss << "CL_FP_ROUND_TO_INF "; if (conf & CL_FP_FMA) oss << "CL_FP_FMA "; if (conf & CL_FP_SOFT_FLOAT) oss << "CL_FP_SOFT_FLOAT "; return oss.str(); } std::string exec_capabilities_to_string(cl_device_exec_capabilities cap) const { std::ostringstream oss; if (cap & CL_EXEC_KERNEL) oss << "CL_EXEC_KERNEL "; if (cap & CL_EXEC_NATIVE_KERNEL) oss << "CL_EXEC_NATIVE_KERNEL "; return oss.str(); } std::string mem_cache_type_to_string(cl_device_mem_cache_type cachetype) const { std::ostringstream oss; if (cachetype == CL_NONE) oss << "CL_NONE "; else if (cachetype == CL_READ_ONLY_CACHE) oss << "CL_READ_ONLY_CACHE "; else if (cachetype == CL_READ_WRITE_CACHE) oss << "CL_READ_WRITE_CACHE "; return oss.str(); } std::string local_mem_type_to_string(cl_device_local_mem_type loc_mem_type) const { std::ostringstream oss; if (loc_mem_type & CL_LOCAL) oss << "CL_LOCAL "; if (loc_mem_type & CL_GLOBAL) oss << "CL_GLOBAL "; return oss.str(); } std::string convert_to_string(std::vector const & vec) const { std::ostringstream oss; for (vcl_size_t i=0; i #else #include #endif #include #include #include #include "viennacl/forwards.h" namespace viennacl { namespace ocl { static const cl_uint intel_id = 32902; static const cl_uint nvidia_id = 4318; static const cl_uint amd_id = 4098; static const cl_uint unknown_id = 0; //Architecture Family enum device_architecture_family{ //NVidia Tesla, Fermi, Kepler, //AMD Evergreen, NorthernIslands, SouthernIslands, UNKNOWN }; static device_architecture_family get_device_architecture(cl_uint vendor_id, std::string const & name){ /*-NVidia-*/ if(vendor_id==nvidia_id){ //GeForce vcl_size_t found=0; if((found= name.find("GeForce",0)) != std::string::npos){ if((found = name.find_first_of("123456789", found)) != std::string::npos){ switch (name[found]) { case '2' : return Tesla; case '3' : return Tesla; case '4' : return Fermi; case '5' : return Fermi; case '6' : return Kepler; case '7' : return Kepler; default: return UNKNOWN; } } else return UNKNOWN; } //Tesla else if((found = name.find("Tesla",0)) != std::string::npos){ if((found = name.find("CMK", found)) != std::string::npos){ switch(name[found]){ case 'C' : return Fermi; case 'M' : return Fermi; case 'K' : return Kepler; default : return UNKNOWN; } } else return UNKNOWN; } else return UNKNOWN; } /*-AMD-*/ else if(vendor_id==amd_id){ #define VIENNACL_DEVICE_MAP(device,arch)if(name.find(device,0)!=std::string::npos) return arch; //Evergreen VIENNACL_DEVICE_MAP("Cedar",Evergreen); VIENNACL_DEVICE_MAP("Redwood",Evergreen); VIENNACL_DEVICE_MAP("Juniper",Evergreen); VIENNACL_DEVICE_MAP("Cypress",Evergreen); VIENNACL_DEVICE_MAP("Hemlock",Evergreen); //NorthernIslands VIENNACL_DEVICE_MAP("Caicos",NorthernIslands); VIENNACL_DEVICE_MAP("Turks",NorthernIslands); VIENNACL_DEVICE_MAP("Barts",NorthernIslands); VIENNACL_DEVICE_MAP("Cayman",NorthernIslands); VIENNACL_DEVICE_MAP("Antilles",NorthernIslands); //SouthernIslands VIENNACL_DEVICE_MAP("Cape",SouthernIslands); VIENNACL_DEVICE_MAP("Bonaire",SouthernIslands); VIENNACL_DEVICE_MAP("Pitcaim",SouthernIslands); VIENNACL_DEVICE_MAP("Tahiti",SouthernIslands); VIENNACL_DEVICE_MAP("Malta",SouthernIslands); #undef VIENNACL_DEVICE_MAP return UNKNOWN; } /*-Other-*/ else{ return UNKNOWN; } } } } //namespace viennacl #endif /*@}*/ ViennaCL-1.5.1-src/viennacl/ocl/backend.hpp000644 001750 001750 00000032325 12267307531 020447 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_BACKEND_HPP_ #define VIENNACL_OCL_BACKEND_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/backend.hpp @brief Implementations of the OpenCL backend, where all contexts are stored in. */ #include #include "viennacl/ocl/context.hpp" #include "viennacl/ocl/enqueue.hpp" namespace viennacl { namespace ocl { /** @brief A backend that provides contexts for ViennaCL objects (vector, matrix, etc.) */ template //never use parameter other than default (introduced for linkage issues only) class backend { public: /** @brief Switches the current context to the context identified by i * * @param i ID of the new active context */ static void switch_context(long i) { current_context_id_ = i; } /** @brief Returns the current active context */ static viennacl::ocl::context & context(long id) { if (!initialized_[id]) { //std::cout << "Initializing context no. " << current_context_id_ << std::endl; contexts_[id].init(); //create one queue per device: std::vector devices = contexts_[id].devices(); for (vcl_size_t j = 0; j::context(current_context_id_); } /** @brief Returns the current queue for the active device in the active context */ static viennacl::ocl::command_queue & get_queue() { return current_context().get_queue(); } /** @brief Sets a number of devices for the context. * * @param i ID of the context to be set up * @param devices A vector of OpenCL device-IDs that should be added to the context */ static void setup_context(long i, std::vector const & devices) { if (initialized_[i]) std::cerr << "ViennaCL: Warning in init_context(): Providing a list of devices has no effect, because context for ViennaCL is already created!" << std::endl; else { //set devices for context: for (vcl_size_t j = 0; j const & devices, std::map< cl_device_id, std::vector< cl_command_queue > > const & queues) { assert(devices.size() == queues.size() && bool("ViennaCL expects one queue per device!")); if (initialized_[i]) std::cerr << "ViennaCL: Warning in init_context(): Providing a list of devices has no effect, because context for ViennaCL is already created!" << std::endl; else { //set devices for context: for (vcl_size_t j = 0; j >::const_iterator queue_iterator; for (queue_iterator qit = queues.begin(); qit != queues.end(); ++qit) { std::vector const & queues_for_device = qit->second; for (vcl_size_t j=0; jfirst, queues_for_device[j]); } initialized_[i] = true; } } /** @brief Initializes ViennaCL with an already existing context * * @param i ID of the context to be set up * @param c The OpenCL handle of the existing context * @param devices A vector of OpenCL device-IDs that should be added to the context * @param queue One queue per device */ static void setup_context(long i, cl_context c, std::vector const & devices, std::vector const & queue) { assert(devices.size() == queue.size() && bool("ViennaCL expects one queue per device!")); //wrap queue vector into map std::map< cl_device_id, std::vector > queues_map; for (vcl_size_t j = 0; j initialized_; static std::map contexts_; }; template long backend::current_context_id_ = 0; template std::map backend::initialized_; template std::map backend::contexts_; ////////////////////// current context ////////////////// /** @brief Convenience function for returning the current context */ inline viennacl::ocl::context & current_context() { return viennacl::ocl::backend<>::current_context(); } /** @brief Convenience function for switching the current context */ inline void switch_context(long i) { viennacl::ocl::backend<>::switch_context(i); } /** @brief Convenience function for returning the current context */ inline viennacl::ocl::context & get_context(long i) { return viennacl::ocl::backend<>::context(i); } /** @brief Convenience function for setting devices for a context */ inline void setup_context(long i, std::vector const & devices) { viennacl::ocl::backend<>::setup_context(i, devices); } /** @brief Convenience function for setting devices for a context */ inline void setup_context(long i, viennacl::ocl::device const & device) { std::vector device_id_array(1); device_id_array[0] = device.id(); viennacl::ocl::backend<>::setup_context(i, device_id_array); } /** @brief Convenience function for setting up a context in ViennaCL from an existing OpenCL context */ inline void setup_context(long i, cl_context c, std::vector const & devices, std::map< cl_device_id, std::vector > const & queues) { viennacl::ocl::backend<>::setup_context(i, c, devices, queues); } /** @brief Convenience function for setting up a context in ViennaCL from an existing OpenCL context */ inline void setup_context(long i, cl_context c, std::vector const & devices, std::vector const & queues) { viennacl::ocl::backend<>::setup_context(i, c, devices, queues); } /** @brief Convenience function for setting up a context in ViennaCL from an existing OpenCL context */ inline void setup_context(long i, cl_context c, cl_device_id d, cl_command_queue q) { std::vector devices(1); std::vector queues(1); devices[0] = d; queues[0] = q; viennacl::ocl::backend<>::setup_context(i, c, devices, queues); } /** @brief Convenience function for setting the default device type for a context */ inline void set_context_device_type(long i, cl_device_type dev_type) { viennacl::ocl::backend<>::set_context_device_type(i, dev_type); } /** @brief Convenience function for setting the default device type for a context to GPUs */ inline void set_context_device_type(long i, viennacl::ocl::gpu_tag) { set_context_device_type(i, CL_DEVICE_TYPE_GPU); } /** @brief Convenience function for setting the default device type for a context to CPUs */ inline void set_context_device_type(long i, viennacl::ocl::cpu_tag) { set_context_device_type(i, CL_DEVICE_TYPE_CPU); } /** @brief Convenience function for setting the default device type for a context to the default OpenCL device type */ inline void set_context_device_type(long i, viennacl::ocl::default_tag) { set_context_device_type(i, CL_DEVICE_TYPE_DEFAULT); } /** @brief Convenience function for setting the default device type for a context to accelerators */ inline void set_context_device_type(long i, viennacl::ocl::accelerator_tag) { set_context_device_type(i, CL_DEVICE_TYPE_ACCELERATOR); } /** @brief Convenience function for setting the number of default devices per context */ inline void set_context_device_num(long i, vcl_size_t num) { viennacl::ocl::backend<>::set_context_device_num(i, num); } /** @brief Convenience function for setting the platform index * * @param i Context ID * @param pf_index The platform index as returned by clGetPlatformIDs(). This is not the ID of type cl_platform_id! */ inline void set_context_platform_index(long i, vcl_size_t pf_index) { viennacl::ocl::backend<>::set_context_platform_index(i, pf_index); } ///////////////////////// get queues /////////////////// /** @brief Convenience function for getting the default queue for the currently active device in the active context */ inline viennacl::ocl::command_queue & get_queue() { return viennacl::ocl::current_context().get_queue(); } /** @brief Convenience function for getting the queue for a particular device in the current active context */ inline viennacl::ocl::command_queue & get_queue(viennacl::ocl::device d, unsigned int queue_id = 0) { return viennacl::ocl::current_context().get_queue(d.id(), queue_id); } /** @brief Convenience function for getting the queue for a particular device in the current active context */ inline viennacl::ocl::command_queue & get_queue(cl_device_id dev_id, unsigned int queue_id = 0) { return viennacl::ocl::current_context().get_queue(dev_id, queue_id); } /** @brief Convenience function for getting the kernel for a particular program from the current active context */ inline viennacl::ocl::kernel & get_kernel(std::string const & prog_name, std::string const & kernel_name) { return viennacl::ocl::current_context().get_program(prog_name).get_kernel(kernel_name); } /** @brief Convenience function for switching the active device in the current context */ inline void switch_device(viennacl::ocl::device & d) { viennacl::ocl::current_context().switch_device(d); } /** @brief Convenience function for returning the active device in the current context */ inline viennacl::ocl::device const & current_device() { return viennacl::ocl::current_context().current_device(); } } //ocl } //viennacl #endif ViennaCL-1.5.1-src/viennacl/ocl/kernel.hpp000644 001750 001750 00000133535 12267307531 020345 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_KERNEL_HPP_ #define VIENNACL_OCL_KERNEL_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/kernel.hpp @brief Representation of an OpenCL kernel in ViennaCL. */ #ifdef __APPLE__ #include #else #include #endif #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/handle.hpp" #include "viennacl/ocl/program.hpp" #include "viennacl/ocl/device.hpp" #include "viennacl/ocl/local_mem.hpp" #include "viennacl/ocl/infos.hpp" namespace viennacl { namespace ocl { /** @brief Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel. * * Since the primary use is for dealing with ranges and strides, the four members are termed accordingly. */ struct packed_cl_uint { /** @brief Starting value of the integer stride. */ cl_uint start; /** @brief Increment between integers. */ cl_uint stride; /** @brief Number of values in the stride. */ cl_uint size; /** @brief Internal length of the buffer. Might be larger than 'size' due to padding. */ cl_uint internal_size; }; /** @brief Represents an OpenCL kernel within ViennaCL */ class kernel { template friend void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue); template friend typename detail::return_type::Result info(viennacl::ocl::kernel & k); template friend typename detail::return_type::Result info(viennacl::ocl::kernel & k, viennacl::ocl::device const & d); public: typedef vcl_size_t size_type; kernel() : handle_(), p_program_(NULL), p_context_(NULL), name_() { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Creating kernel object (default CTOR)" << std::endl; #endif } kernel(cl_kernel kernel_handle, viennacl::ocl::program const & kernel_program, viennacl::ocl::context const & kernel_context, std::string const & name) : handle_(kernel_handle, kernel_context), p_program_(&kernel_program), p_context_(&kernel_context), name_(name) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Creating kernel object (full CTOR)" << std::endl; #endif set_work_size_defaults(); } kernel(kernel const & other) : handle_(other.handle_), p_program_(other.p_program_), p_context_(other.p_context_), name_(other.name_) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Creating kernel object (Copy CTOR)" << std::endl; #endif local_work_size_[0] = other.local_work_size_[0]; local_work_size_[1] = other.local_work_size_[1]; local_work_size_[2] = other.local_work_size_[2]; global_work_size_[0] = other.global_work_size_[0]; global_work_size_[1] = other.global_work_size_[1]; global_work_size_[2] = other.global_work_size_[2]; } viennacl::ocl::kernel & operator=(const kernel & other) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Assigning kernel object" << std::endl; #endif handle_ = other.handle_; p_program_ = other.p_program_; p_context_ = other.p_context_; name_ = other.name_; local_work_size_[0] = other.local_work_size_[0]; local_work_size_[1] = other.local_work_size_[1]; local_work_size_[2] = other.local_work_size_[2]; global_work_size_[0] = other.global_work_size_[0]; global_work_size_[1] = other.global_work_size_[1]; global_work_size_[2] = other.global_work_size_[2]; return *this; } /** @brief Sets a char argument at the provided position */ void arg(unsigned int pos, cl_char val) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting char kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_char), (void*)&val); VIENNACL_ERR_CHECK(err); } /** @brief Sets a char argument at the provided position */ void arg(unsigned int pos, cl_uchar val) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting unsigned char kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_uchar), (void*)&val); VIENNACL_ERR_CHECK(err); } /** @brief Sets a argument of type short at the provided position */ void arg(unsigned int pos, cl_short val) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting short kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_short), (void*)&val); VIENNACL_ERR_CHECK(err); } /** @brief Sets a argument of type unsigned short at the provided position */ void arg(unsigned int pos, cl_ushort val) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting unsigned short kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_ushort), (void*)&val); VIENNACL_ERR_CHECK(err); } /** @brief Sets an unsigned integer argument at the provided position */ void arg(unsigned int pos, cl_uint val) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting unsigned int kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_uint), (void*)&val); VIENNACL_ERR_CHECK(err); } /** @brief Sets four packed unsigned integers as argument at the provided position */ void arg(unsigned int pos, packed_cl_uint val) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting packed_cl_uint kernel argument (" << val.start << ", " << val.stride << ", " << val.size << ", " << val.internal_size << ") at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(packed_cl_uint), (void*)&val); VIENNACL_ERR_CHECK(err); } /** @brief Sets a single precision floating point argument at the provided position */ void arg(unsigned int pos, float val) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting floating point kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(float), (void*)&val); VIENNACL_ERR_CHECK(err); } /** @brief Sets a double precision floating point argument at the provided position */ void arg(unsigned int pos, double val) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting double precision kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(double), (void*)&val); VIENNACL_ERR_CHECK(err); } /** @brief Sets an int argument at the provided position */ void arg(unsigned int pos, cl_int val) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting int precision kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_int), (void*)&val); VIENNACL_ERR_CHECK(err); } /** @brief Sets an unsigned long argument at the provided position */ void arg(unsigned int pos, cl_ulong val) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting ulong precision kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_ulong), (void*)&val); VIENNACL_ERR_CHECK(err); } /** @brief Sets an unsigned long argument at the provided position */ void arg(unsigned int pos, cl_long val) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting long precision kernel argument " << val << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_long), (void*)&val); VIENNACL_ERR_CHECK(err); } //generic handling: call .handle() member /** @brief Sets an OpenCL memory object at the provided position */ template void arg(unsigned int pos, VCL_TYPE const & val) { assert(&val.handle().opencl_handle().context() == &handle_.context() && bool("Kernel and memory object not in the same context!")); cl_mem temp = val.handle().opencl_handle().get(); #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting generic kernel argument " << temp << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_mem), (void*)&temp); VIENNACL_ERR_CHECK(err); } //forward handles directly: /** @brief Sets an OpenCL object at the provided position */ template void arg(unsigned int pos, viennacl::ocl::handle const & h) { CL_TYPE temp = h.get(); #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting handle kernel argument " << temp << " at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(CL_TYPE), (void*)&temp); VIENNACL_ERR_CHECK(err); } //local buffer argument: /** @brief Sets an OpenCL local memory object at the provided position */ void arg(unsigned int pos, const local_mem & mem) { cl_uint size = static_cast(mem.size()); #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting local memory kernel argument of size " << size << " bytes at pos " << pos << " for kernel " << name_ << std::endl; #endif cl_int err = clSetKernelArg(handle_.get(), pos, size, 0); VIENNACL_ERR_CHECK(err); } /** @brief Convenience function for setting one kernel parameter */ template kernel & operator()(T0 const & t0) { arg(0, t0); return *this; } /** @brief Convenience function for setting two kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1) { arg(0, t0); arg(1, t1); return *this; } /** @brief Convenience function for setting three kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2) { arg(0, t0); arg(1, t1); arg(2, t2); return *this; } /** @brief Convenience function for setting four kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); return *this; } /** @brief Convenience function for setting five kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); return *this; } /** @brief Convenience function for setting six kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); return *this; } /** @brief Convenience function for setting seven kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); return *this; } /** @brief Convenience function for setting eight kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); return *this; } /** @brief Convenience function for setting nine kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); return *this; } /** @brief Convenience function for setting ten kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); return *this; } /** @brief Convenience function for setting eleven kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); return *this; } /** @brief Convenience function for setting twelve kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); return *this; } /** @brief Convenience function for setting thirteen kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); return *this; } /** @brief Convenience function for setting fourteen kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); return *this; } /** @brief Convenience function for setting fifteen kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); return *this; } /** @brief Convenience function for setting sixteen kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); return *this; } /** @brief Convenience function for setting seventeen kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); return *this; } /** @brief Convenience function for setting eighteen kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); return *this; } /** @brief Convenience function for setting nineteen kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); return *this; } /** @brief Convenience function for setting twenty kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); return *this; } /** @brief Convenience function for setting twentyone kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); return *this; } /** @brief Convenience function for setting twentytwo kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); return *this; } /** @brief Convenience function for setting 23 kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); return *this; } /** @brief Convenience function for setting 24 kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23); return *this; } /** @brief Convenience function for setting 25 kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23, T24 const & t24 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23); arg(24, t24); return *this; } /** @brief Convenience function for setting 26 kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23, T24 const & t24, T25 const & t25 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23); arg(24, t24); arg(25, t25); return *this; } /** @brief Convenience function for setting 27 kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23, T24 const & t24, T25 const & t25, T26 const & t26 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23); arg(24, t24); arg(25, t25); arg(26, t26); return *this; } /** @brief Convenience function for setting 28 kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23, T24 const & t24, T25 const & t25, T26 const & t26, T27 const & t27 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23); arg(24, t24); arg(25, t25); arg(26, t26); arg(27, t27); return *this; } /** @brief Convenience function for setting 29 kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23, T24 const & t24, T25 const & t25, T26 const & t26, T27 const & t27, T28 const & t28 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23); arg(24, t24); arg(25, t25); arg(26, t26); arg(27, t27); arg(28, t28); return *this; } /** @brief Convenience function for setting 30 kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23, T24 const & t24, T25 const & t25, T26 const & t26, T27 const & t27, T28 const & t28, T29 const & t29 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23); arg(24, t24); arg(25, t25); arg(26, t26); arg(27, t27); arg(28, t28); arg(29, t29); return *this; } /** @brief Convenience function for setting 31 kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23, T24 const & t24, T25 const & t25, T26 const & t26, T27 const & t27, T28 const & t28, T29 const & t29, T30 const & t30 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23); arg(24, t24); arg(25, t25); arg(26, t26); arg(27, t27); arg(28, t28); arg(29, t29); arg(30, t30); return *this; } /** @brief Convenience function for setting 32 kernel parameters */ template kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17, T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22, T23 const & t23, T24 const & t24, T25 const & t25, T26 const & t26, T27 const & t27, T28 const & t28, T29 const & t29, T30 const & t30, T31 const & t31 ) { arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17); arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22); arg(23, t23); arg(24, t24); arg(25, t25); arg(26, t26); arg(27, t27); arg(28, t28); arg(29, t29); arg(30, t30); arg(31, t31); return *this; } /** @brief Returns the local work size at the respective dimension * * @param index Dimension index (currently either 0 or 1) */ size_type local_work_size(int index = 0) const { assert(index < 3 && bool("Work size index out of bounds")); return local_work_size_[index]; } /** @brief Returns the global work size at the respective dimension * * @param index Dimension index (currently either 0 or 1) */ size_type global_work_size(int index = 0) const { assert(index < 3 && bool("Work size index out of bounds")); return global_work_size_[index]; } /** @brief Sets the local work size at the respective dimension * * @param index Dimension index (currently either 0 or 1) * @param s The new local work size */ void local_work_size(int index, size_type s) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting local work size to " << s << " at index " << index << " for kernel " << name_ << std::endl; #endif assert(index < 3 && bool("Work size index out of bounds")); local_work_size_[index] = s; } /** @brief Sets the global work size at the respective dimension * * @param index Dimension index (currently either 0 or 1) * @param s The new global work size */ void global_work_size(int index, size_type s) { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) std::cout << "ViennaCL: Setting global work size to " << s << " at index " << index << " for kernel " << name_ << std::endl; #endif assert(index < 3 && bool("Work size index out of bounds")); global_work_size_[index] = s; } std::string const & name() const { return name_; } viennacl::ocl::handle const & handle() const { return handle_; } viennacl::ocl::context const & context() const { return *p_context_; } private: inline void set_work_size_defaults(); //see context.hpp for implementation viennacl::ocl::handle handle_; viennacl::ocl::program const * p_program_; viennacl::ocl::context const * p_context_; std::string name_; size_type local_work_size_[3]; size_type global_work_size_[3]; }; /** @brief Queries information about a kernel * * @param k Corresponding kernel */ template typename detail::return_type::Result info(viennacl::ocl::kernel & k) { typedef typename detail::return_type::Result res_t; return detail::get_info_impl()(k.handle_.get(),param); } /** @brief Queries information about the execution of a kernel on a particular device * * @param k Corresponding kernel * @param d Corresponding device */ template typename detail::return_type::Result info(viennacl::ocl::kernel & k, viennacl::ocl::device const & d) { typedef typename detail::return_type::Result res_t; return detail::get_info_impl()(k.handle_.get(),d.id(),param); } } //namespace ocl } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/ocl/error.hpp000644 001750 001750 00000064707 12267307531 020222 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_ERROR_HPP_ #define VIENNACL_OCL_ERROR_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/error.hpp @brief Error handling for the OpenCL layer of ViennaCL */ //error levels: //#define VIENNACL_DEBUG_ALL //print all of the following //#define VIENNACL_DEBUG_KERNEL //debug any modifications on viennacl::ocl::kernel objects //#define VIENNACL_DEBUG_COPY //print infos related to setting up/modifying memory objects //#define VIENNACL_DEBUG_OPENCL //display debug info for the OpenCL layer (platform/context/queue creation, //#define VIENNACL_DEBUG_DEVICE //Show device info upon allocation //#define VIENNACL_DEBUG_CONTEXT //Debug queries to context //#define VIENNACL_DEBUG_BUILD //Show debug info from OpenCL compiler //backwards compatibility: #ifdef VIENNACL_BUILD_INFO #define VIENNACL_DEBUG_ALL #endif #ifdef __APPLE__ #include #else #include #endif #include #include #include #define VIENNACL_BUG_REPORT_STRING \ "\nIf you think that this is a bug in ViennaCL, please report it at viennacl-support@lists.sourceforge.net and supply at least the following information:\n"\ " * Operating System\n"\ " * Which OpenCL implementation (AMD, NVIDIA, etc.)\n"\ " * ViennaCL version\n"\ "Many thanks in advance!";\ namespace viennacl { namespace ocl { //Wrapper for OpenCL exceptions: /** @brief Exception thrown in the case that a requested compute device was not found. * * This exception usually shows up if a user requests a GPU for computation, but the OpenCL SDK does not support the GPU. */ class device_not_found : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_DEVICE_NOT_FOUND \n ViennaCL could not find a suitable device. Please check whether an OpenCL implementation is properly installed and a suitable device available." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the selected compute device is not available (maybe locked by another process). */ class device_not_available : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_DEVICE_NOT_AVAILABLE \n ViennaCL could not use the compute device because it is not available." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the OpenCL just-in-time compiler is not available. */ class compiler_not_available : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_COMPILER_NOT_AVAILABLE \n Your OpenCL framework does not provide an OpenCL compiler. Unfortunately, ViennaCL cannot be used without such a compiler." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if a memory object cannot be allocated. Usually the requested memory buffer is simply too large. */ class mem_object_allocation_failure : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_MEM_OBJECT_ALLOCATION_FAILURE \n ViennaCL could not allocate memory on the device. Most likely the device simply ran out of memory." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the compute device is out of resources (either global memory, registers, etc.) for the requested operation. */ class out_of_resources : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_OUT_OF_RESOURCES \n ViennaCL tried to launch a compute kernel, but the device does not provide enough resources. Try changing the global and local work item sizes." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the host cannot provide enough memory for the datastructures in the OpenCL backend (temporary arrays, etc.) to perform the requested operation. */ class out_of_host_memory : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_OUT_OF_HOST_MEMORY \n The host ran out of memory (usually CPU RAM). Please try again on smaller problems." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the OpenCL context does not have CL_QUEUE_PROFILING_ENABLE set, if the execution is not complete, or the event object is a user event object. */ class profiling_info_not_available : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_PROFILING_INFO_NOT_AVAILABLE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the source buffer overlaps the destination buffer when copying from device memory to device memory. */ class mem_copy_overlap : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_MEM_COPY_OVERLAP." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if there is a mismatch in image formats for the operands. */ class image_format_mismatch : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_IMAGE_FORMAT_MISMATCH." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the requested image format is not supported. */ class image_format_not_supported : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_IMAGE_FORMAT_NOT_SUPPORTED." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the OpenCL program cannot be built, usually due to a syntax error in the OpenCL code. */ class build_program_failure : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_BUILD_PROGRAM_FAILURE \n The OpenCL compiler encountered an error during the compilation of ViennaCL sources. This is most likely a bug in ViennaCL." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the mapping of device memory to the host memory space failed. */ class map_failure : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_MAP_FAILURE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown is an invalid value is provided to an OpenCL function. */ class invalid_value : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_VALUE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if an invalid device type is specified. */ class invalid_device_type : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_DEVICE_TYPE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if an invalid OpenCL platform is provided to an OpenCL function. */ class invalid_platform : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_PLATFORM." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if an invalid OpenCL device is provided to an OpenCL function. */ class invalid_device : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_DEVICE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if an invalid OpenCL context is provided to an OpenCL function. */ class invalid_context : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_CONTEXT." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if invalid OpenCL command queue properties are provided when creating a command queue. */ class invalid_queue_properties : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_QUEUE_PROPERTIES." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if an invalid OpenCL command queue is provided to an OpenCL function. */ class invalid_command_queue : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_COMMAND_QUEUE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the provided pointer to host memory is invalid. */ class invalid_host_ptr : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_HOST_PTR." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if an invalid OpenCL memory object (of type cl_mem) is passed to an OpenCL funciton. */ class invalid_mem_object : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_MEM_OBJECT." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if an invalid image format descriptor is provided. */ class invalid_image_format_descriptor : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_IMAGE_FORMAT_DESCRIPTOR." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the image size provided is invalid (e.g. zero). */ class invalid_image_size : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_IMAGE_SIZE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if an invalid sampler is provided for an image. */ class invalid_sampler : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_SAMPLER." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the OpenCL binary (generated from the jit-compiler or loaded from some other location) won't work on the device (e.g. due to a lack of double precision support). */ class invalid_binary : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_BINARY." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if invalid build options are passed to the OpenCL just-in-time compiler. */ class invalid_build_options : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_BUILD_OPTIONS." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if an OpenCL program object handle is invalid (e.g. not initialized). */ class invalid_program : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_PROGRAM." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if there is no built program exectuable available for the device. */ class invalid_program_executable : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_PROGRAM_EXECUTABLE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the provided kernel name is invalid (e.g. not part of the program provided). */ class invalid_kernel_name : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_KERNEL_NAME \n The supplied kernel name is invalid. If you have written your own OpenCL kernel, please check that the correct kernel name is used in the initalization of the kernel object." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the kernel definition (number of arguments, argument types, etc.) is not the same for all devices for which the program has been built. */ class invalid_kernel_definition : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_KERNEL_DEFINITION." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the provided kernel object (of type cl_kernel) is invalid (e.g. not initialized, from different context, or corrupted). */ class invalid_kernel : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_KERNEL \n The supplied kernel argument is invalid." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the kernel argument index is invalid, e.g. an arg index larger than the number of kernel arguments was provided. */ class invalid_arg_index : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_ARG_INDEX." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the kernel argument provided has an invalid value. */ class invalid_arg_value : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_ARG_VALUE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the arguments to an OpenCL kernel have an invalid size e.g. not sizeof(cl_mem)). */ class invalid_arg_size : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_ARG_SIZE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the kernel arguments are invalid and/or do not fit the kernel parameter list. */ class invalid_kernel_args : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_KERNEL_ARGS \n The supplied kernel arguments do not fit the kernel parameter list. If you have written your own OpenCL kernel, please check that the correct kernel arguments are set in the appropriate order." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the work dimension is invalid (usually this means that the work dimension was set to be larger than three. */ class invalid_work_dimension : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_WORK_DIMENSION" VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the number of work groups is invalid (usually this means that more than 256/512/768/1024 work groups have been specified, but the device(s) cannot support this. */ class invalid_work_group_size : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_WORK_GROUP_SIZE \n The supplied work group size is invalid. If you have set this value manually, please reconsider your choice." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the number of work items per work group invalid (usually this means that more than 256/512/768/1024 work items have been specified, but the device(s) cannot support this. */ class invalid_work_item_size : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_WORK_ITEM_SIZE \n The work item size is invalid. If you have set this value manually, please reconsider your choice." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the provided offset for get_global_id() in OpenCL kernels is invalid. */ class invalid_global_offset : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_GLOBAL_OFFSET." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the provided event wait list is invalid. */ class invalid_event_wait_list : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_EVENT_WAIT_LIST." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the provided event object (of type cl_event) is invalid. */ class invalid_event : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_EVENT." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if interoperability of OpenCL with other frameworks collide. */ class invalid_operation : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_OPERATION." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the provided OpenGL (not OpenCL) object is invalid. */ class invalid_gl_object : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_GL_OBJECT." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the provided buffer size is invalid (e.g. zero) */ class invalid_buffer_size : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_BUFFER_SIZE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the provided miplevel is greater than zero, but the OpenGL implementation does not support creating from non-zero mipmap levels. */ class invalid_mip_level : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_MIP_LEVEL." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the total number of work items is invalid (for example, not divisible by the number of work items per work group). */ class invalid_global_work_size : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_GLOBAL_WORK_SIZE." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if an invalid property is provided to a function (vague value). */ class invalid_property : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: CL_INVALID_PROPERTY." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the returned error cannot be resolved to some defined error constant. Might result from invalid sources, invalid memory operations, etc. */ class unknown_error : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: ViennaCL encountered an unknown OpenCL error. In some cases, this might be due to an invalid global work size, but it can also be due to several compilation errors." VIENNACL_BUG_REPORT_STRING; } }; /** @brief Exception thrown if the user wants to use double precision arithmetics, but the device does not support double precision. */ class double_precision_not_provided_error : public std::exception { virtual const char* what() const throw() { return "ViennaCL: FATAL ERROR: You requested to create a ViennaCL type using double precision. However, double precision is not supported by your device." VIENNACL_BUG_REPORT_STRING; } }; /** @brief An error reporting class. Template argument is used to avoid problems with external linkage. * * Do not use this class directly, use the macro CL_ERROR_CHECK instead. * @tparam T Useless. Helps to avoid troubles with external linkage of namespace functions. */ template struct error_checker { /** @brief Trows exceptions that reflect OpenCL error codes */ static void raise_exception(cl_int err) { switch (err) { case CL_DEVICE_NOT_FOUND: throw device_not_found(); case CL_DEVICE_NOT_AVAILABLE: throw device_not_available(); case CL_COMPILER_NOT_AVAILABLE: throw compiler_not_available(); case CL_MEM_OBJECT_ALLOCATION_FAILURE: throw mem_object_allocation_failure(); case CL_OUT_OF_RESOURCES: throw out_of_resources(); case CL_OUT_OF_HOST_MEMORY: throw out_of_host_memory(); case CL_PROFILING_INFO_NOT_AVAILABLE: throw profiling_info_not_available(); case CL_MEM_COPY_OVERLAP: throw mem_copy_overlap(); case CL_IMAGE_FORMAT_MISMATCH: throw image_format_mismatch(); case CL_IMAGE_FORMAT_NOT_SUPPORTED: throw image_format_not_supported(); case CL_BUILD_PROGRAM_FAILURE: throw build_program_failure(); case CL_MAP_FAILURE: throw map_failure(); case CL_INVALID_VALUE: throw invalid_value(); case CL_INVALID_DEVICE_TYPE: throw invalid_device_type(); case CL_INVALID_PLATFORM: throw invalid_platform(); case CL_INVALID_DEVICE: throw invalid_device(); case CL_INVALID_CONTEXT: throw invalid_context(); case CL_INVALID_QUEUE_PROPERTIES: throw invalid_queue_properties(); case CL_INVALID_COMMAND_QUEUE: throw invalid_command_queue(); case CL_INVALID_HOST_PTR: throw invalid_host_ptr(); case CL_INVALID_MEM_OBJECT: throw invalid_mem_object(); case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: throw invalid_image_format_descriptor(); case CL_INVALID_IMAGE_SIZE: throw invalid_image_size(); case CL_INVALID_SAMPLER: throw invalid_sampler(); case CL_INVALID_BINARY: throw invalid_binary(); case CL_INVALID_BUILD_OPTIONS: throw invalid_build_options(); case CL_INVALID_PROGRAM: throw invalid_program(); case CL_INVALID_PROGRAM_EXECUTABLE: throw invalid_program_executable(); case CL_INVALID_KERNEL_NAME: throw invalid_kernel_name(); case CL_INVALID_KERNEL_DEFINITION: throw invalid_kernel_definition(); case CL_INVALID_KERNEL: throw invalid_kernel(); case CL_INVALID_ARG_INDEX: throw invalid_arg_index(); case CL_INVALID_ARG_VALUE: throw invalid_arg_value(); case CL_INVALID_ARG_SIZE: throw invalid_arg_size(); case CL_INVALID_KERNEL_ARGS: throw invalid_kernel_args(); case CL_INVALID_WORK_DIMENSION: throw invalid_work_dimension(); case CL_INVALID_WORK_GROUP_SIZE: throw invalid_work_group_size(); case CL_INVALID_WORK_ITEM_SIZE: throw invalid_work_item_size(); case CL_INVALID_GLOBAL_OFFSET: throw invalid_global_offset(); case CL_INVALID_EVENT_WAIT_LIST: throw invalid_event_wait_list(); case CL_INVALID_EVENT: throw invalid_event(); case CL_INVALID_OPERATION: throw invalid_operation(); case CL_INVALID_GL_OBJECT: throw invalid_gl_object(); case CL_INVALID_BUFFER_SIZE: throw invalid_buffer_size(); case CL_INVALID_MIP_LEVEL: throw invalid_mip_level(); case CL_INVALID_GLOBAL_WORK_SIZE: throw invalid_global_work_size(); #ifdef CL_INVALID_PROPERTY case CL_INVALID_PROPERTY: throw invalid_property(); #endif // return "CL_INVALID_GLOBAL_WORK_SIZE"; default: throw unknown_error(); } } //getErrorString /** @brief Checks whether an OpenCL error has occured. * * Do not use this function directly, use the macro CL_ERROR_CHECK instead. */ static void checkError(cl_int err, #ifdef VIENNACL_DEBUG_ALL const char * file, const char * func, int line) #else const char *, const char *, int) #endif { if (err != CL_SUCCESS) { #ifdef VIENNACL_DEBUG_ALL std::cerr << "ViennaCL: Error " << err << " in function " << func << " ( "<< file << ":" << line << " ) " << std::endl; #endif raise_exception(err); } } //checkError() }; //struct #define VIENNACL_ERR_CHECK(err) viennacl::ocl::error_checker::checkError(err, __FILE__, __FUNCTION__, __LINE__); } //namespace ocl } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/ocl/program.hpp000644 001750 001750 00000005576 12267307531 020537 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_PROGRAM_HPP_ #define VIENNACL_OCL_PROGRAM_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/program.hpp @brief Implements an OpenCL program class for ViennaCL */ #include #include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/handle.hpp" #include "viennacl/ocl/kernel.hpp" namespace viennacl { namespace ocl { /** @brief Wrapper class for an OpenCL program. * * This class was written when the OpenCL C++ bindings haven't been standardized yet. * Regardless, it takes care about some additional details and is supposed to provide higher convenience by holding the kernels defined in the program. */ class program { typedef std::vector KernelContainer; public: program() : p_context_(NULL) {} program(cl_program program_handle, viennacl::ocl::context const & program_context, std::string const & prog_name = std::string()) : handle_(program_handle, program_context), p_context_(&program_context), name_(prog_name) {} program(program const & other) : handle_(other.handle_), p_context_(other.p_context_), name_(other.name_), kernels_(other.kernels_) {} viennacl::ocl::program & operator=(const program & other) { handle_ = other.handle_; name_ = other.name_; p_context_ = other.p_context_; kernels_ = other.kernels_; return *this; } std::string const & name() const { return name_; } /** @brief Adds a kernel to the program */ inline viennacl::ocl::kernel & add_kernel(cl_kernel kernel_handle, std::string const & kernel_name); //see context.hpp for implementation /** @brief Returns the kernel with the provided name */ inline viennacl::ocl::kernel & get_kernel(std::string const & name); //see context.hpp for implementation const viennacl::ocl::handle & handle() const { return handle_; } private: viennacl::ocl::handle handle_; viennacl::ocl::context const * p_context_; std::string name_; KernelContainer kernels_; }; } //namespace ocl } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/ocl/utils.hpp000644 001750 001750 00000006465 12267307531 020226 0ustar00rupprupp000000 000000 #ifndef VIENNACL_OCL_UTILS_HPP_ #define VIENNACL_OCL_UTILS_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ocl/utils.hpp @brief Provides OpenCL-related utilities. */ #include #include #include "viennacl/ocl/backend.hpp" #include "viennacl/ocl/device.hpp" namespace viennacl { namespace ocl { /** @brief Ensures that double precision types are only allocated if it is supported by the device. If double precision is requested for a device not capable of providing that, a double_precision_not_provided_error is thrown. */ template struct DOUBLE_PRECISION_CHECKER { static void apply(viennacl::ocl::context const &) {} }; /** \cond */ template <> struct DOUBLE_PRECISION_CHECKER { static void apply(viennacl::ocl::context const & ctx) { if (!ctx.current_device().double_support()) throw viennacl::ocl::double_precision_not_provided_error(); } }; /** \endcond */ /** \brief Helper class for converting a type to its string representation. */ template struct type_to_string; /** \cond */ template <> struct type_to_string { static std::string apply() { return "char"; } }; template <> struct type_to_string { static std::string apply() { return "short"; } }; template <> struct type_to_string { static std::string apply() { return "int"; } }; template <> struct type_to_string { static std::string apply() { return "long"; } }; template <> struct type_to_string { static std::string apply() { return "uchar"; } }; template <> struct type_to_string { static std::string apply() { return "ushort"; } }; template <> struct type_to_string { static std::string apply() { return "uint"; } }; template <> struct type_to_string { static std::string apply() { return "ulong"; } }; template <> struct type_to_string { static std::string apply() { return "float"; } }; template <> struct type_to_string { static std::string apply() { return "double"; } }; /** \endcond */ template void append_double_precision_pragma(viennacl::ocl::context const & /*ctx*/, std::string & /*source*/) {} template <> inline void append_double_precision_pragma(viennacl::ocl::context const & ctx, std::string & source) { source.append("#pragma OPENCL EXTENSION " + ctx.current_device().double_support_extension() + " : enable\n\n"); } } //ocl } //viennacl #endif ViennaCL-1.5.1-src/viennacl/tools/000755 001750 001750 00000000000 12267307531 016725 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/viennacl/tools/timer.hpp000644 001750 001750 00000005227 12267307531 020564 0ustar00rupprupp000000 000000 #ifndef _VIENNACL_TOOLS_TIMER_HPP_ #define _VIENNACL_TOOLS_TIMER_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/tools/timer.hpp @brief A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling. */ #include #ifdef _WIN32 #define WINDOWS_LEAN_AND_MEAN #include #undef min #undef max namespace viennacl{ namespace tools{ /** @brief Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows). * * Avoids messing with Boost and should be sufficient for benchmarking purposes. */ class timer { public: timer() { QueryPerformanceFrequency(&freq); } void start() { QueryPerformanceCounter((LARGE_INTEGER*) &start_time); } double get() const { LARGE_INTEGER end_time; QueryPerformanceCounter((LARGE_INTEGER*) &end_time); return (static_cast(end_time.QuadPart) - static_cast(start_time.QuadPart)) / static_cast(freq.QuadPart); } private: LARGE_INTEGER freq; LARGE_INTEGER start_time; }; } } #else #include namespace viennacl{ namespace tools{ /** @brief Simple timer class based on gettimeofday (POSIX) or QueryPerformanceCounter (Windows). * * Avoids messing with Boost and should be sufficient for benchmarking purposes. */ class timer { public: timer() : ts(0) {} void start() { struct timeval tval; gettimeofday(&tval, NULL); ts = static_cast(tval.tv_sec * 1000000 + tval.tv_usec); } double get() const { struct timeval tval; gettimeofday(&tval, NULL); double end_time = tval.tv_sec * 1000000 + tval.tv_usec; return static_cast(end_time-ts) / 1000000.0; } private: double ts; }; } } #endif #endif ViennaCL-1.5.1-src/viennacl/tools/shared_ptr.hpp000644 001750 001750 00000010372 12267307531 021574 0ustar00rupprupp000000 000000 #ifndef VIENNACL_TOOLS_SHARED_PTR_HPP #define VIENNACL_TOOLS_SHARED_PTR_HPP /* ========================================================================= Copyright (c) 2010-2012, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file tools/shared_ptr.hpp @brief Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available. Contributed by Philippe Tillet. */ #include #include namespace viennacl { namespace tools { namespace detail { /** @brief Reference counting class for the shared_ptr implementation */ class count { public: count(unsigned int val) : val_(val){ } void dec(){ --val_; } void inc(){ ++val_; } bool is_null(){ return val_ == 0; } unsigned int val(){ return val_; } private: unsigned int val_; }; /** @brief Interface for the reference counter inside the shared_ptr */ struct aux { detail::count count; aux() :count(1) {} virtual void destroy()=0; virtual ~aux() {} }; /** @brief Implementation helper for the reference counting mechanism inside shared_ptr. */ template struct auximpl: public detail::aux { U* p; Deleter d; auximpl(U* pu, Deleter x) :p(pu), d(x) {} virtual void destroy() { d(p); } }; /** @brief Default deleter class for a pointer. The default is to just call 'delete' on the pointer. Provide your own implementations for 'delete[]' and 'free'. */ template struct default_deleter { void operator()(U* p) const { delete p; } }; } /** @brief A shared pointer class similar to boost::shared_ptr. Reimplemented in order to avoid a Boost-dependency. Will be replaced by std::shared_ptr as soon as C++11 is widely available. */ template class shared_ptr { template friend class shared_ptr; detail::aux* pa; T* pt; public: shared_ptr() :pa(NULL), pt(NULL) {} template shared_ptr(U* pu, Deleter d) : pa(new detail::auximpl(pu, d)), pt(pu) {} template explicit shared_ptr(U* pu) : pa(new detail::auximpl >(pu, detail::default_deleter())), pt(pu) {} shared_ptr(const shared_ptr& s) :pa(s.pa), pt(s.pt) { inc(); } template shared_ptr(const shared_ptr& s) :pa(s.pa), pt(s.pt) { inc(); } ~shared_ptr() { dec(); } void reset(){ shared_ptr().swap(*this); } void reset(T * ptr){ shared_ptr(ptr).swap(*this); } void swap(shared_ptr & other){ std::swap(pt,other.pt); std::swap(pa, other.pa); } shared_ptr& operator=(const shared_ptr& s) { if(this!=&s) { dec(); pa = s.pa; pt = s.pt; inc(); } return *this; } T* get() const { return pt; } T* operator->() const { return pt; } T& operator*() const { return *pt; } void inc() { if(pa) pa->count.inc(); } void dec() { if(pa) { pa->count.dec(); if(pa->count.is_null()) { pa->destroy(); delete pa; pa = NULL; } } } }; } } #endif // VIENNACL_UTILS_SHARED_PTR_HPP ViennaCL-1.5.1-src/viennacl/tools/matrix_size_deducer.hpp000644 001750 001750 00000022257 12267307531 023477 0ustar00rupprupp000000 000000 #ifndef VIENNACL_TOOLS_MATRIX_SIZE_DEDUCER_HPP_ #define VIENNACL_TOOLS_MATRIX_SIZE_DEDUCER_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/tools/matrix_size_deducer.hpp @brief Helper implementations that deduce the dimensions of the supplied matrix-valued expressions. */ #include #include #include #include #include #include #include "viennacl/forwards.h" #include "viennacl/tools/adapter.hpp" namespace viennacl { namespace tools { /** @brief Deduces the size of the resulting vector represented by a vector_expression from the operands * * @tparam LHS The left hand side operand * @tparam RHS The right hand side operand * @tparam OP The operation tag */ template struct MATRIX_SIZE_DEDUCER { //Standard case: size1 from lhs, size2 from rhs (fits most cases) static vcl_size_t size1(LHS & lhs, RHS & /*rhs*/) { return lhs.size1(); } static vcl_size_t size2(LHS & /*lhs*/, RHS & rhs) { return rhs.size2(); } }; /** \cond */ //special case: outer vector product: template struct MATRIX_SIZE_DEDUCER, const viennacl::vector_base, viennacl::op_prod> { static vcl_size_t size1(viennacl::vector_base const & lhs, viennacl::vector_base const & /*rhs*/) { return lhs.size(); } static vcl_size_t size2(viennacl::vector_base const & /*lhs*/, viennacl::vector_base const & rhs) { return rhs.size(); } }; //special case: multiplication with a scalar template struct MATRIX_SIZE_DEDUCER, const ScalarType, viennacl::op_mult> { static vcl_size_t size1(viennacl::matrix_expression const & lhs, ScalarType const & /*rhs*/) { return MATRIX_SIZE_DEDUCER::size1(lhs.lhs(), lhs.rhs()); } static vcl_size_t size2(viennacl::matrix_expression const & lhs, ScalarType const & /*rhs*/) { return MATRIX_SIZE_DEDUCER::size2(lhs.lhs(), lhs.rhs()); } }; //special case: multiplication with a scalar template struct MATRIX_SIZE_DEDUCER, const ScalarType, viennacl::op_mult> { static vcl_size_t size1(viennacl::matrix_base const & lhs, ScalarType const & /*rhs*/) { return lhs.size1(); } static vcl_size_t size2(viennacl::matrix_base const & lhs, ScalarType const & /*rhs*/) { return lhs.size2(); } }; //special case: division with a scalar template struct MATRIX_SIZE_DEDUCER, const ScalarType, viennacl::op_div> { static vcl_size_t size1(viennacl::matrix_expression const & lhs, ScalarType const & /*rhs*/) { return MATRIX_SIZE_DEDUCER::size1(lhs.lhs(), lhs.rhs()); } static vcl_size_t size2(viennacl::matrix_expression const & lhs, ScalarType const & /*rhs*/) { return MATRIX_SIZE_DEDUCER::size2(lhs.lhs(), lhs.rhs()); } }; //special case: division with a scalar template struct MATRIX_SIZE_DEDUCER, const ScalarType, viennacl::op_div> { static vcl_size_t size1(viennacl::matrix_base const & lhs, ScalarType const & /*rhs*/) { return lhs.size1(); } static vcl_size_t size2(viennacl::matrix_base const & lhs, ScalarType const & /*rhs*/) { return lhs.size2(); } }; //special case: diagonal from vector template struct MATRIX_SIZE_DEDUCER, const int, viennacl::op_vector_diag> { static vcl_size_t size1(viennacl::vector_base const & lhs, const int k) { return lhs.size() + static_cast(std::fabs(double(k))); } static vcl_size_t size2(viennacl::vector_base const & lhs, const int k) { return lhs.size() + static_cast(std::fabs(double(k))); } }; //special case: transposed matrix-vector product: Return the number of rows of the matrix template struct MATRIX_SIZE_DEDUCER { static vcl_size_t size1(const MatrixType & lhs, const MatrixType & /*rhs*/) { return lhs.size2(); } static vcl_size_t size2(const MatrixType & lhs, const MatrixType & /*rhs*/) { return lhs.size1(); } }; // A^T * B template struct MATRIX_SIZE_DEDUCER, const viennacl::matrix_base, viennacl::op_mat_mat_prod> { static vcl_size_t size1(viennacl::matrix_expression const & lhs, viennacl::matrix_base const & /*rhs*/) { return lhs.lhs().size2(); } static vcl_size_t size2(viennacl::matrix_expression const & /*lhs*/, viennacl::matrix_base const & rhs) { return rhs.size2(); } }; // A * B^T template struct MATRIX_SIZE_DEDUCER, const viennacl::matrix_expression, viennacl::op_mat_mat_prod> { static vcl_size_t size1(viennacl::matrix_base const & lhs, viennacl::matrix_expression const & /*rhs*/) { return lhs.size1(); } static vcl_size_t size2(viennacl::matrix_base const & /*lhs*/, viennacl::matrix_expression const & rhs) { return rhs.lhs().size1(); } }; // A^T * B^T template struct MATRIX_SIZE_DEDUCER, const viennacl::matrix_expression, viennacl::op_mat_mat_prod> { typedef viennacl::matrix_expression LHSType; typedef viennacl::matrix_expression RHSType; static vcl_size_t size1(LHSType const & lhs, RHSType const & /*rhs*/) { return lhs.lhs().size2(); } static vcl_size_t size2(LHSType const & /*lhs*/, RHSType const & rhs) { return rhs.lhs().size1(); } }; /** \endcond */ } } #endif ViennaCL-1.5.1-src/viennacl/tools/tools.hpp000644 001750 001750 00000022011 12267307531 020572 0ustar00rupprupp000000 000000 #ifndef VIENNACL_TOOLS_TOOLS_HPP_ #define VIENNACL_TOOLS_TOOLS_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/tools/tools.hpp @brief Various little tools used here and there in ViennaCL. */ #include #include #include #include "viennacl/forwards.h" #include "viennacl/tools/adapter.hpp" #include #include namespace viennacl { namespace tools { /** \cond */ /** @brief Supply suitable increment functions for the iterators: */ template struct MATRIX_ITERATOR_INCREMENTER > { static void apply(const viennacl::matrix & /*mat*/, unsigned int & row, unsigned int & /*col*/) { ++row; } }; template struct MATRIX_ITERATOR_INCREMENTER > { static void apply(const viennacl::matrix & /*mat*/, unsigned int & /*row*/, unsigned int & col) { ++col; } }; /** \endcond */ /** @brief A guard that checks whether the floating point type of GPU types is either float or double */ template struct CHECK_SCALAR_TEMPLATE_ARGUMENT { typedef typename T::ERROR_SCALAR_MUST_HAVE_TEMPLATE_ARGUMENT_FLOAT_OR_DOUBLE ResultType; }; /** \cond */ template <> struct CHECK_SCALAR_TEMPLATE_ARGUMENT { typedef float ResultType; }; template <> struct CHECK_SCALAR_TEMPLATE_ARGUMENT { typedef double ResultType; }; /** \endcond */ /** @brief Reads a text from a file into a std::string * * @param filename The filename * @return The text read from the file */ inline std::string readTextFromFile(const std::string & filename) { std::ifstream f(filename.c_str()); if (!f) return std::string(); std::stringstream result; std::string tmp; while (std::getline(f, tmp)) result << tmp << std::endl; return result.str(); } /** @brief Replaces all occurances of a substring by another stringstream * * @param text The string to search in * @param to_search The substring to search for * @param to_replace The replacement for found substrings * @return The resulting string */ inline std::string strReplace(const std::string & text, std::string to_search, std::string to_replace) { std::string::size_type pos = 0; std::string result; std::string::size_type found; while( (found = text.find(to_search, pos)) != std::string::npos ) { result.append(text.substr(pos,found-pos)); result.append(to_replace); pos = found + to_search.length(); } if (pos < text.length()) result.append(text.substr(pos)); return result; } /** @brief Rounds an integer to the next multiple of another integer * * @tparam INT_TYPE The integer type * @param to_reach The integer to be rounded up (ceil operation) * @param base The base * @return The smallest multiple of 'base' such that to_reach <= base */ template INT_TYPE align_to_multiple(INT_TYPE to_reach, INT_TYPE base) { if (to_reach % base == 0) return to_reach; return ((to_reach / base) + 1) * base; } /** @brief Rounds an integer to the previous multiple of another integer * * @tparam INT_TYPE The integer type * @param to_reach The integer to be rounded down (floor operation) * @param base The base * @return The biggest multiple of 'base' such that to_reach >= base */ template INT_TYPE roundDownToPreviousMultiple(INT_TYPE to_reach, INT_TYPE base) { if (to_reach % base == 0) return to_reach; return (to_reach / base) * base; } /** @brief Replace in a source string a pattern by another * * @param source The source string * @param find String to find * @param replace String to replace */ int inline find_and_replace(std::string & source, std::string const & find, std::string const & replace) { int num=0; vcl_size_t fLen = find.size(); vcl_size_t rLen = replace.size(); for (vcl_size_t pos=0; (pos=source.find(find, pos))!=std::string::npos; pos+=rLen) { num++; source.replace(pos, fLen, replace); } return num; } /** @brief Create a double precision kernel out of a single precision kernel * * @param source The source string * @param fp_extension An info string that specifies the OpenCL double precision extension * @return The double precision kernel */ inline std::string make_double_kernel(std::string const & source, std::string const & fp_extension) { std::stringstream ss; ss << "#pragma OPENCL EXTENSION " << fp_extension << " : enable\n\n"; std::string result = ss.str(); result.append(strReplace(source, "float", "double")); return result; } /** @brief Removes the const qualifier from a type */ template struct CONST_REMOVER { typedef T ResultType; }; /** \cond */ template struct CONST_REMOVER { typedef T ResultType; }; /** \endcond */ /////// CPU scalar type deducer /////////// /** @brief Obtain the cpu scalar type from a type, including a GPU type like viennacl::scalar * * @tparam T Either a CPU scalar type or a GPU scalar type */ template struct CPU_SCALAR_TYPE_DEDUCER { //force compiler error if type cannot be deduced //typedef T ResultType; }; /** \cond */ template <> struct CPU_SCALAR_TYPE_DEDUCER< float > { typedef float ResultType; }; template <> struct CPU_SCALAR_TYPE_DEDUCER< double > { typedef double ResultType; }; template struct CPU_SCALAR_TYPE_DEDUCER< viennacl::scalar > { typedef T ResultType; }; template struct CPU_SCALAR_TYPE_DEDUCER< viennacl::vector > { typedef T ResultType; }; template struct CPU_SCALAR_TYPE_DEDUCER< viennacl::matrix > { typedef T ResultType; }; template struct CPU_SCALAR_TYPE_DEDUCER< viennacl::matrix_expression, const matrix, op_trans> > { typedef T ResultType; }; /** \endcond */ // // Converts a scalar type when necessary unless it is a viennacl::scalar<> (typical use-case: convert user-provided floats to double (and vice versa) for OpenCL kernels) // template viennacl::scalar const & promote_if_host_scalar(viennacl::scalar const & s) { return s; } template viennacl::scalar_expression, const viennacl::scalar, viennacl::op_flip_sign> const & promote_if_host_scalar(viennacl::scalar_expression, const viennacl::scalar, viennacl::op_flip_sign> const & s) { return s; } template HostScalarType promote_if_host_scalar(float s) { return s; } template HostScalarType promote_if_host_scalar(double s) { return s; } template HostScalarType promote_if_host_scalar(long s) { return s; } template HostScalarType promote_if_host_scalar(unsigned long s) { return s; } template HostScalarType promote_if_host_scalar(int s) { return s; } template HostScalarType promote_if_host_scalar(unsigned int s) { return s; } } //namespace tools } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/tools/adapter.hpp000644 001750 001750 00000035204 12267307531 021062 0ustar00rupprupp000000 000000 #ifndef VIENNACL_TOOLS_ADAPTER_HPP_ #define VIENNACL_TOOLS_ADAPTER_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/tools/adapter.hpp @brief Adapter classes for sparse matrices made of the STL type std::vector > */ #include #include #include #include #include "viennacl/forwards.h" #include #include namespace viennacl { namespace tools { /** @brief A const iterator for sparse matrices of type std::vector > * * The iterator behaves like ublas iterators. Attention: Iteration along first columns and then rows via .begin() is untested! * * @tparam SCALARTYPE either float or double * @tparam is_iterator1 if true, this iterator iterates along increasing row indices, otherwise along increasing column indices * @tparam increment if +1, this is a forward iterator, if -1 we have a reverse iterator */ template class const_sparse_matrix_adapted_iterator { typedef const_sparse_matrix_adapted_iterator self_type; public: typedef self_type iterator1; typedef self_type iterator2; typedef vcl_size_t size_type; const_sparse_matrix_adapted_iterator(std::vector > const & mat, int i, int j) : mat_(mat), i_(i), j_(j) { if (i < 0) //reverse iterator end { //iter2 = mat_[0].rend(); //reverse iterator end } else //i_ is valid { if (j < 0) { //iter2 = mat_[i].rend(); } else //j_ is valid { if (i_ < mat_.size() && mat_[i].size() > 0 ) { //TODO: Start at entry j, not at the beginning if (static_cast(mat_[i].rbegin()->first) < j) iter2 = mat_[i].end(); else iter2 = mat_[i].begin(); } else if (i_ < mat_.size() && mat_[i].size() == 0) iter2 = mat_[i].end(); else //i is out of range -> end iterator requested iter2 = mat_.back().end(); //forward iterator end } } } SCALARTYPE operator*(void) const { if (is_iterator1) { typedef typename std::map::const_iterator col_iterator; col_iterator colit = mat_[i_].find(static_cast(j_)); if (colit != mat_[i_].end()) return colit->second; return 0.0; } else return iter2->second; } self_type & operator++(void) { if (is_iterator1) { if (is_forward) ++i_; else --i_; } else ++iter2; return *this; } self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; } self_type operator+=(SizeType offset) { if (is_iterator1) { if (is_forward) i_ += offset; else i_ -= offset; } else { for (SizeType k=0; kfirst; } const_sparse_matrix_adapted_iterator begin() const { return const_sparse_matrix_adapted_iterator(mat_, static_cast(i_), 0); } const_sparse_matrix_adapted_iterator end() const { int end_ = static_cast(mat_[i_].size()); if (end_ > 0) end_ = mat_[i_].rbegin()->first; return const_sparse_matrix_adapted_iterator(mat_, static_cast(i_), end_ + 1); } private: std::vector > const & mat_; typename std::map::const_iterator iter2; size_type i_; size_type j_; }; /** @brief Adapts a constant sparse matrix type made up from std::vector > to basic ublas-compatibility. * * @tparam SCALARTYPE either float or double */ template class const_sparse_matrix_adapter { public: typedef const_sparse_matrix_adapted_iterator const_iterator1; typedef const_sparse_matrix_adapted_iterator const_iterator2; typedef const_sparse_matrix_adapted_iterator const_reverse_iterator1; typedef SCALARTYPE value_type; typedef vcl_size_t size_type; const_sparse_matrix_adapter(std::vector > const & mat) : mat_(mat), size1_(mat_.size()), size2_(mat_.size()) {} const_sparse_matrix_adapter(std::vector > const & mat, size_type num_rows, size_type num_cols) : mat_(mat), size1_(num_rows), size2_(num_cols) {} size_type size1() const { return size1_; } size_type size2() const { return size2_; } const_iterator1 begin1() const { return const_iterator1(mat_, 0, 0); } const_iterator1 end1() const { return const_iterator1(mat_, static_cast(size1()), static_cast(size2())); } const_reverse_iterator1 rbegin1() const { return const_reverse_iterator1(mat_, static_cast(size1() - 1), 0); } const_reverse_iterator1 rend1() const { return const_reverse_iterator1(mat_, -1, static_cast(size2())); } const_iterator2 begin2() const { return const_iterator2(mat_, 0, 0); } const_iterator2 end2() const { return const_iterator2(mat_, size1(), size2()); } SCALARTYPE operator()(SizeType i, SizeType j) const { typedef typename std::map::const_iterator col_iterator; col_iterator colit = mat_[i].find(j); if (colit != mat_[i].end()) return colit->second; return 0.0; } private: std::vector > const & mat_; size_type size1_; size_type size2_; }; /** @brief A non-const iterator for sparse matrices of type std::vector > * * The iterator behaves like ublas iterators. Attention: Iteration along first columns and then rows via .begin() is untested! Reverse iterators are missing! * * @tparam SCALARTYPE either float or double * @tparam is_iterator1 if true, this iterator iterates along increasing row indices, otherwise along increasiong column indices */ template class sparse_matrix_adapted_iterator { typedef sparse_matrix_adapted_iterator self_type; public: typedef self_type iterator1; typedef self_type iterator2; typedef vcl_size_t size_type; sparse_matrix_adapted_iterator(std::vector > & mat, int i, int j) : mat_(mat), i_(i), j_(j) { if (i < 0) //reverse iterator end { //iter2 = mat_[0].rend(); //reverse iterator end } else //_i is valid { if (j < 0) { //iter2 = mat[i]_.rend(); } else //_j is valid { if (i_ < mat_.size() && mat_[i].size() > 0 ) { //TODO: Start at entry j, not at the beginning if (static_cast(mat_[i].rbegin()->first) < j) iter2 = mat_[i].end(); else iter2 = mat_[i].begin(); } else if (i_ < mat_.size() && mat_[i].size() == 0) iter2 = mat_[i].end(); else //i is out of range -> end iterator requested iter2 = mat_.back().end(); //forward iterator end } } } SCALARTYPE & operator*(void) { if (is_iterator1) { return mat_[i_][static_cast(j_)]; } else return iter2->second; } self_type & operator++(void) { if (is_iterator1) ++i_; else ++iter2; return *this; } self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; } self_type operator+=(size_type offset) { if (is_iterator1) i_ += offset; else { for (size_type k=0; kfirst; } sparse_matrix_adapted_iterator begin() const { return sparse_matrix_adapted_iterator(mat_, static_cast(i_), 0); } sparse_matrix_adapted_iterator end() const { int end_ = static_cast(mat_[i_].size()); if (end_ > 0) end_ = mat_[i_].rbegin()->first; return sparse_matrix_adapted_iterator(mat_, static_cast(i_), end_ + 1); } private: std::vector > & mat_; typename std::map::iterator iter2; size_type i_; size_type j_; }; /** @brief Adapts a non-const sparse matrix type made up from std::vector > to basic ublas-compatibility. * * @tparam SCALARTYPE either float or double */ template class sparse_matrix_adapter : public const_sparse_matrix_adapter { typedef const_sparse_matrix_adapter BaseType; public: typedef sparse_matrix_adapted_iterator iterator1; typedef sparse_matrix_adapted_iterator iterator2; typedef const_sparse_matrix_adapted_iterator const_iterator1; typedef const_sparse_matrix_adapted_iterator const_iterator2; typedef SizeType size_type; sparse_matrix_adapter(std::vector > & mat) : BaseType(mat), mat_(mat), size1_(mat_.size()), size2_(mat_.size()) {} sparse_matrix_adapter(std::vector > & mat, vcl_size_t num_rows, vcl_size_t num_cols) : BaseType(mat, num_rows, num_cols), mat_(mat), size1_(static_cast(num_rows)), size2_(static_cast(num_cols)) {} iterator1 begin1() { return iterator1(mat_, 0, 0); } iterator1 end1() { return iterator1(mat_, static_cast(mat_.size()), static_cast(mat_.back().size())); } const_iterator1 begin1() const { return const_iterator1(mat_, 0, 0); } const_iterator1 end1() const { return const_iterator1(mat_, size1(), size2()); } iterator2 begin2() { return iterator2(mat_, 0, 0); } iterator2 end2() { return iterator2(mat_, mat_.size(), mat_.back().size()); } const_iterator2 begin2() const { return const_iterator2(mat_, 0, 0); } const_iterator2 end2() const { return const_iterator2(mat_, size1(), size2()); } SCALARTYPE & operator()(vcl_size_t i, vcl_size_t j) { return mat_[i][static_cast(j)]; } void resize(vcl_size_t i, vcl_size_t j, bool preserve = true) { if (i>0) mat_.resize(i); if (!preserve) clear(); size1_ = static_cast(i); size2_ = static_cast(j); } void clear() { for (size_type i=0; i > & mat_; size_type size1_; size_type size2_; }; } } #endif ViennaCL-1.5.1-src/viennacl/tools/entry_proxy.hpp000644 001750 001750 00000017225 12267307531 022047 0ustar00rupprupp000000 000000 #ifndef VIENNACL_TOOLS_ENTRY_PROXY_HPP_ #define VIENNACL_TOOLS_ENTRY_PROXY_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/tools/entry_proxy.hpp @brief A proxy class for entries in a vector */ #include "viennacl/forwards.h" #include "viennacl/scalar.hpp" namespace viennacl { //proxy class for single vector entries (this is a slow operation!!) /** * @brief A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-users of the library. * * This proxy provides access to a single entry of a vector. If the element is assigned to a GPU object, no unnecessary transfers to the CPU and back to GPU are initiated. * * @tparam SCALARTYPE Either float or double */ template class entry_proxy { public: typedef viennacl::backend::mem_handle handle_type; /** @brief The constructor for the proxy class. Declared explicit to avoid any surprises created by the compiler. * * @param mem_offset The memory offset in multiples of sizeof(SCALARTYPE) relative to the memory pointed to by the handle * @param mem_handle A viennacl::ocl::handle for the memory buffer on the GPU. */ explicit entry_proxy(vcl_size_t mem_offset, handle_type & mem_handle) : index_(mem_offset), mem_handle_(mem_handle) {} //operators: /** @brief Inplace addition of a CPU floating point value */ entry_proxy & operator+=(SCALARTYPE value) { SCALARTYPE temp = read(); temp += value; write(temp); return *this; } /** @brief Inplace subtraction of a CPU floating point value */ entry_proxy & operator-=(SCALARTYPE value) { SCALARTYPE temp = read(); temp -= value; write(temp); return *this; } /** @brief Inplace multiplication with a CPU floating point value */ entry_proxy & operator*=(SCALARTYPE value) { SCALARTYPE temp = read(); temp *= value; write(temp); return *this; } /** @brief Inplace division by a CPU floating point value */ entry_proxy & operator/=(SCALARTYPE value) { SCALARTYPE temp = read(); temp /= value; write(temp); return *this; } /** @brief Assignment of a CPU floating point value */ entry_proxy & operator=(SCALARTYPE value) { write(value); return *this; } /** @brief Assignment of a GPU floating point value. Avoids unnecessary GPU->CPU->GPU transfers */ entry_proxy & operator=(scalar const & value) { viennacl::backend::memory_copy(value.handle(), mem_handle_, 0, sizeof(SCALARTYPE)*index_, sizeof(SCALARTYPE)); return *this; } /** @brief Assignment of another GPU value. */ entry_proxy & operator=(entry_proxy const & other) { viennacl::backend::memory_copy(other.handle(), mem_handle_, sizeof(SCALARTYPE) * other.index_, sizeof(SCALARTYPE)*index_, sizeof(SCALARTYPE)); return *this; } //type conversion: // allows to write something like: // double test = vector(4); /** @brief Conversion to a CPU floating point value. * * This conversion allows to write something like * double test = vector(4); * However, one has to keep in mind that CPU<->GPU transfers are very slow compared to CPU<->CPU operations. */ operator SCALARTYPE () const { SCALARTYPE temp = read(); return temp; } /** @brief Returns the index of the represented element */ vcl_size_t index() const { return index_; } /** @brief Returns the memory viennacl::ocl::handle */ handle_type const & handle() const { return mem_handle_; } private: /** @brief Reads an element from the GPU to the CPU */ SCALARTYPE read() const { SCALARTYPE temp; viennacl::backend::memory_read(mem_handle_, sizeof(SCALARTYPE)*index_, sizeof(SCALARTYPE), &temp); return temp; } /** @brief Writes a floating point value to the GPU */ void write(SCALARTYPE value) { viennacl::backend::memory_write(mem_handle_, sizeof(SCALARTYPE)*index_, sizeof(SCALARTYPE), &value); } vcl_size_t index_; viennacl::backend::mem_handle & mem_handle_; }; //entry_proxy /** * @brief A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-users of the library. * * This proxy provides access to a single entry of a vector. If the element is assigned to a GPU object, no unnecessary transfers to the CPU and back to GPU are initiated. * * @tparam SCALARTYPE Either float or double */ template class const_entry_proxy { typedef const_entry_proxy self_type; public: typedef viennacl::backend::mem_handle handle_type; /** @brief The constructor for the proxy class. Declared explicit to avoid any surprises created by the compiler. * * @param mem_offset The memory offset in multiples of sizeof(SCALARTYPE) relative to the memory pointed to by the handle * @param mem_handle A viennacl::ocl::handle for the memory buffer on the GPU. */ explicit const_entry_proxy(vcl_size_t mem_offset, handle_type const & mem_handle) : index_(mem_offset), mem_handle_(mem_handle) {} //type conversion: // allows to write something like: // double test = vector(4); /** @brief Conversion to a CPU floating point value. * * This conversion allows to write something like * double test = vector(4); * However, one has to keep in mind that CPU<->GPU transfers are very slow compared to CPU<->CPU operations. */ operator SCALARTYPE () const { SCALARTYPE temp = read(); return temp; } /** @brief Returns the index of the represented element */ unsigned int index() const { return index_; } /** @brief Returns the memory handle */ handle_type const & handle() const { return mem_handle_; } private: /** @brief Reads an element from the GPU to the CPU */ SCALARTYPE read() const { SCALARTYPE temp; viennacl::backend::memory_read(mem_handle_, sizeof(SCALARTYPE)*index_, sizeof(SCALARTYPE), &temp); return temp; } vcl_size_t index_; viennacl::backend::mem_handle const & mem_handle_; }; //entry_proxy } #endif ViennaCL-1.5.1-src/viennacl/matrix_proxy.hpp000644 001750 001750 00000047337 12267307531 021061 0ustar00rupprupp000000 000000 #ifndef VIENNACL_MATRIX_PROXY_HPP_ #define VIENNACL_MATRIX_PROXY_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file matrix_proxy.hpp @brief Proxy classes for matrices. */ #include "viennacl/forwards.h" #include "viennacl/range.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/matrix_operations.hpp" namespace viennacl { /** @brief Class for representing non-strided submatrices of a bigger matrix A. * * In MATLAB notation, this could for example refer to the submatrix A(3:8, 6:10) of a matrix A. */ template class matrix_range : public matrix_base { typedef matrix_base base_type; typedef matrix_range self_type; public: typedef typename MatrixType::orientation_category orientation_category; typedef typename MatrixType::value_type value_type; typedef typename viennacl::result_of::cpu_value_type::type cpu_value_type; typedef range::size_type size_type; typedef range::difference_type difference_type; typedef value_type reference; typedef const value_type & const_reference; matrix_range(MatrixType & A, range const & row_range, range const & col_range) : base_type(A.handle(), row_range.size(), row_range.start(), 1, A.internal_size1(), col_range.size(), col_range.start(), 1, A.internal_size2()) {} using base_type::operator=; }; ///////////////////////////////////////////////////////////// ///////////////////////// CPU to GPU //////////////////////// ///////////////////////////////////////////////////////////// //row_major: template void copy(const CPU_MATRIX & cpu_matrix, matrix_range > & gpu_matrix_range ) { assert( (cpu_matrix.size1() == gpu_matrix_range.size1()) && (cpu_matrix.size2() == gpu_matrix_range.size2()) && bool("Matrix size mismatch!")); if ( gpu_matrix_range.start2() != 0) { std::vector entries(gpu_matrix_range.size2()); //copy each stride separately: for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) { for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) entries[j] = cpu_matrix(i,j); vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2(); vcl_size_t num_entries = gpu_matrix_range.size2(); viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); //std::cout << "Strided copy worked!" << std::endl; } } else { //full block can be copied: std::vector entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2()); //copy each stride separately: for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) entries[i*gpu_matrix_range.internal_size2() + j] = cpu_matrix(i,j); vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2(); vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.internal_size2(); viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); //std::cout << "Block copy worked!" << std::endl; } } //column_major: template void copy(const CPU_MATRIX & cpu_matrix, matrix_range > & gpu_matrix_range ) { assert( (cpu_matrix.size1() == gpu_matrix_range.size1()) && (cpu_matrix.size2() == gpu_matrix_range.size2()) && bool("Matrix size mismatch!")); if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.size1()) { std::vector entries(gpu_matrix_range.size1()); //copy each stride separately: for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) { for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) entries[i] = cpu_matrix(i,j); vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1(); vcl_size_t num_entries = gpu_matrix_range.size1(); viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); //std::cout << "Strided copy worked!" << std::endl; } } else { //full block can be copied: std::vector entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2()); //copy each stride separately: for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) entries[i + j*gpu_matrix_range.internal_size1()] = cpu_matrix(i,j); vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1(); vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2(); viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); //std::cout << "Block copy worked!" << std::endl; } } ///////////////////////////////////////////////////////////// ///////////////////////// GPU to CPU //////////////////////// ///////////////////////////////////////////////////////////// //row_major: template void copy(matrix_range > const & gpu_matrix_range, CPU_MATRIX & cpu_matrix) { assert( (cpu_matrix.size1() == gpu_matrix_range.size1()) && (cpu_matrix.size2() == gpu_matrix_range.size2()) && bool("Matrix size mismatch!")); if ( gpu_matrix_range.start2() != 0) { std::vector entries(gpu_matrix_range.size2()); //copy each stride separately: for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) { vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2(); vcl_size_t num_entries = gpu_matrix_range.size2(); viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); //std::cout << "Strided copy worked!" << std::endl; for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) cpu_matrix(i,j) = entries[j]; } } else { //full block can be copied: std::vector entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2()); vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2(); vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2(); viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); //std::cout << "Block copy worked!" << std::endl; for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) cpu_matrix(i,j) = entries[i*gpu_matrix_range.internal_size2() + j]; } } //column_major: template void copy(matrix_range > const & gpu_matrix_range, CPU_MATRIX & cpu_matrix) { assert( (cpu_matrix.size1() == gpu_matrix_range.size1()) && (cpu_matrix.size2() == gpu_matrix_range.size2()) && bool("Matrix size mismatch!")); if ( gpu_matrix_range.start1() != 0) { std::vector entries(gpu_matrix_range.size1()); //copy each stride separately: for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) { vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1(); vcl_size_t num_entries = gpu_matrix_range.size1(); viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); //std::cout << "Strided copy worked!" << std::endl; for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) cpu_matrix(i,j) = entries[i]; } } else { //full block can be copied: std::vector entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2()); //copy each stride separately: vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1(); vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2(); viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); //std::cout << "Block copy worked!" << std::endl; for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) cpu_matrix(i,j) = entries[i + j*gpu_matrix_range.internal_size1()]; } } // // Convenience function // template matrix_range project(MatrixType & A, viennacl::range const & r1, viennacl::range const & r2) { assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of range invalid!")); return matrix_range(A, r1, r2); } template matrix_range project(matrix_range & A, viennacl::range const & r1, viennacl::range const & r2) { assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of range invalid!")); return matrix_range(A, viennacl::range(A.start1() + r1.start(), A.start1() + r1.start() + r1.size()), viennacl::range(A.start2() + r2.start(), A.start2() + r2.start() + r2.size()) ); } // // // /////////////////////////////// Slice ///////////////////////////////////////////// // // // /** @brief Class for representing strided submatrices of a bigger matrix A. * * In MATLAB notation, this could for example refer to the submatrix A(3:2:8, 6:3:16) of a matrix A. */ template class matrix_slice : public matrix_base { typedef matrix_base base_type; typedef matrix_slice self_type; public: typedef typename MatrixType::orientation_category orientation_category; typedef typename MatrixType::value_type value_type; typedef typename viennacl::result_of::cpu_value_type::type cpu_value_type; typedef range::size_type size_type; typedef range::difference_type difference_type; typedef value_type reference; typedef const value_type & const_reference; matrix_slice(MatrixType & A, slice const & row_slice, slice const & col_slice) : base_type(A.handle(), row_slice.size(), row_slice.start(), row_slice.stride(), A.internal_size1(), col_slice.size(), col_slice.start(), col_slice.stride(), A.internal_size2()) {} using base_type::operator=; }; ///////////////////////////////////////////////////////////// ///////////////////////// CPU to GPU //////////////////////// ///////////////////////////////////////////////////////////// //row_major: template void copy(const CPU_MATRIX & cpu_matrix, matrix_slice > & gpu_matrix_slice ) { assert( (cpu_matrix.size1() == gpu_matrix_slice.size1()) && (cpu_matrix.size2() == gpu_matrix_slice.size2()) && bool("Matrix size mismatch!")); if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) ) { vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2(); //no. of entries per stride std::vector entries(num_entries); //copy each stride separately: for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i) { vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2(); viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j) entries[j * gpu_matrix_slice.stride2()] = cpu_matrix(i,j); viennacl::backend::memory_write(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); } } } //column_major: template void copy(const CPU_MATRIX & cpu_matrix, matrix_slice > & gpu_matrix_slice ) { assert( (cpu_matrix.size1() == gpu_matrix_slice.size1()) && (cpu_matrix.size2() == gpu_matrix_slice.size2()) && bool("Matrix size mismatch!")); if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) ) { vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1(); //no. of entries per stride std::vector entries(num_entries); //copy each column stride separately: for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j) { vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1(); viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i) entries[i * gpu_matrix_slice.stride1()] = cpu_matrix(i,j); viennacl::backend::memory_write(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); } } } ///////////////////////////////////////////////////////////// ///////////////////////// GPU to CPU //////////////////////// ///////////////////////////////////////////////////////////// //row_major: template void copy(matrix_slice > const & gpu_matrix_slice, CPU_MATRIX & cpu_matrix) { assert( (cpu_matrix.size1() == gpu_matrix_slice.size1()) && (cpu_matrix.size2() == gpu_matrix_slice.size2()) && bool("Matrix size mismatch!")); if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) ) { vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2(); //no. of entries per stride std::vector entries(num_entries); //copy each stride separately: for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i) { vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2(); viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j) cpu_matrix(i,j) = entries[j * gpu_matrix_slice.stride2()]; } } } //column_major: template void copy(matrix_slice > const & gpu_matrix_slice, CPU_MATRIX & cpu_matrix) { assert( (cpu_matrix.size1() == gpu_matrix_slice.size1()) && (cpu_matrix.size2() == gpu_matrix_slice.size2()) && bool("Matrix size mismatch!")); if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) ) { vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1(); //no. of entries per stride std::vector entries(num_entries); //copy each column stride separately: for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j) { vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1(); viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i) cpu_matrix(i,j) = entries[i * gpu_matrix_slice.stride1()]; } } } // // Convenience function // template matrix_slice project(MatrixType & A, viennacl::slice const & r1, viennacl::slice const & r2) { assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of slice invalid!")); return matrix_slice(A, r1, r2); } template matrix_slice project(matrix_range & A, viennacl::slice const & r1, viennacl::slice const & r2) { assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of slice invalid!")); return matrix_slice(A, viennacl::slice(A.start1() + r1.start(), r1.stride(), r1.size()), viennacl::slice(A.start2() + r2.start(), r2.stride(), r2.size()) ); } template matrix_slice project(matrix_slice & A, viennacl::slice const & r1, viennacl::slice const & r2) { assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of slice invalid!")); return matrix_slice(A, viennacl::slice(A.start1() + r1.start(), A.stride1() * r1.stride(), r1.size()), viennacl::slice(A.start2() + r2.start(), A.stride2() * r2.stride(), r2.size()) ); } // TODO: Allow mix of range/slice } #endif ViennaCL-1.5.1-src/viennacl/coordinate_matrix.hpp000644 001750 001750 00000052641 12267307531 022021 0ustar00rupprupp000000 000000 #ifndef VIENNACL_COORDINATE_MATRIX_HPP_ #define VIENNACL_COORDINATE_MATRIX_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/coordinate_matrix.hpp @brief Implementation of the coordinate_matrix class */ #include #include #include #include "viennacl/forwards.h" #include "viennacl/vector.hpp" #include "viennacl/linalg/sparse_matrix_operations.hpp" namespace viennacl { //provide copy-operation: /** @brief Copies a sparse matrix from the host to the OpenCL device (either GPU or multi-core CPU) * * For the requirements on the CPU_MATRIX type, see the documentation of the function copy(CPU_MATRIX, compressed_matrix<>) * * @param cpu_matrix A sparse matrix on the host. * @param gpu_matrix A compressed_matrix from ViennaCL */ template void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix & gpu_matrix ) { assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); vcl_size_t group_num = 64; // Step 1: Determine nonzeros: if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) { vcl_size_t num_entries = 0; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ++num_entries; } } // Step 2: Set up matrix data: gpu_matrix.nonzeros_ = num_entries; gpu_matrix.rows_ = cpu_matrix.size1(); gpu_matrix.cols_ = cpu_matrix.size2(); viennacl::backend::typesafe_host_array group_boundaries(gpu_matrix.handle3(), group_num + 1); viennacl::backend::typesafe_host_array coord_buffer(gpu_matrix.handle12(), 2*gpu_matrix.internal_nnz()); std::vector elements(gpu_matrix.internal_nnz()); vcl_size_t data_index = 0; vcl_size_t current_fraction = 0; group_boundaries.set(0, 0); for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { coord_buffer.set(2*data_index, col_it.index1()); coord_buffer.set(2*data_index + 1, col_it.index2()); elements[data_index] = *col_it; ++data_index; } while (data_index > (current_fraction + 1) / static_cast(group_num) * num_entries) //split data equally over 64 groups group_boundaries.set(++current_fraction, data_index); } //write end of last group: group_boundaries.set(group_num, data_index); //group_boundaries[1] = data_index; //for one compute unit //std::cout << "Group boundaries: " << std::endl; //for (vcl_size_t i=0; i > format to an OpenCL device. * * @param cpu_matrix A sparse square matrix on the host. * @param gpu_matrix A coordinate_matrix from ViennaCL */ template void copy(const std::vector< std::map > & cpu_matrix, coordinate_matrix & gpu_matrix ) { copy(tools::const_sparse_matrix_adapter(cpu_matrix, cpu_matrix.size(), cpu_matrix.size()), gpu_matrix); } //gpu to cpu: /** @brief Copies a sparse matrix from the OpenCL device (either GPU or multi-core CPU) to the host. * * There are two type requirements on the CPU_MATRIX type (fulfilled by e.g. boost::numeric::ublas): * - resize(rows, cols) A resize function to bring the matrix into the correct size * - operator(i,j) Write new entries via the parenthesis operator * * @param gpu_matrix A coordinate_matrix from ViennaCL * @param cpu_matrix A sparse matrix on the host. */ template void copy(const coordinate_matrix & gpu_matrix, CPU_MATRIX & cpu_matrix ) { assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) { //get raw data from memory: viennacl::backend::typesafe_host_array coord_buffer(gpu_matrix.handle12(), 2*gpu_matrix.nnz()); std::vector elements(gpu_matrix.nnz()); //std::cout << "GPU nonzeros: " << gpu_matrix.nnz() << std::endl; viennacl::backend::memory_read(gpu_matrix.handle12(), 0, coord_buffer.raw_size(), coord_buffer.get()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * elements.size(), &(elements[0])); //fill the cpu_matrix: for (vcl_size_t index = 0; index < gpu_matrix.nnz(); ++index) cpu_matrix(coord_buffer[2*index], coord_buffer[2*index+1]) = elements[index]; } } /** @brief Copies a sparse matrix from an OpenCL device to the host. The host type is the std::vector< std::map < > > format . * * @param gpu_matrix A coordinate_matrix from ViennaCL * @param cpu_matrix A sparse matrix on the host. */ template void copy(const coordinate_matrix & gpu_matrix, std::vector< std::map > & cpu_matrix) { tools::sparse_matrix_adapter temp(cpu_matrix, gpu_matrix.size1(), gpu_matrix.size2()); copy(gpu_matrix, temp); } //////////////////////// coordinate_matrix ////////////////////////// /** @brief A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row and column indices and val denotes the entry. * * The present implementation of coordinate_matrix suffers from poor runtime efficiency. Users are adviced to use compressed_matrix in the meanwhile. * * @tparam SCALARTYPE The floating point type (either float or double, checked at compile time) * @tparam ALIGNMENT The internal memory size for the arrays, given by (size()/ALIGNMENT + 1) * ALIGNMENT. ALIGNMENT must be a power of two. */ template class coordinate_matrix { public: typedef viennacl::backend::mem_handle handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; /** @brief Default construction of a coordinate matrix. No memory is allocated */ coordinate_matrix() : rows_(0), cols_(0), nonzeros_(0), group_num_(64) {} explicit coordinate_matrix(viennacl::context ctx) : rows_(0), cols_(0), nonzeros_(0), group_num_(64) { group_boundaries_.switch_active_handle_id(ctx.memory_type()); coord_buffer_.switch_active_handle_id(ctx.memory_type()); elements_.switch_active_handle_id(ctx.memory_type()); #ifdef VIENNACL_WITH_OPENCL if (ctx.memory_type() == OPENCL_MEMORY) { group_boundaries_.opencl_handle().context(ctx.opencl_context()); coord_buffer_.opencl_handle().context(ctx.opencl_context()); elements_.opencl_handle().context(ctx.opencl_context()); } #endif } /** @brief Construction of a coordinate matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated * * @param rows Number of rows * @param cols Number of columns * @param nonzeros Optional number of nonzeros for memory preallocation * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) */ coordinate_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros = 0, viennacl::context ctx = viennacl::context()) : rows_(rows), cols_(cols), nonzeros_(nonzeros) { if (nonzeros > 0) { viennacl::backend::memory_create(group_boundaries_, viennacl::backend::typesafe_host_array().element_size() * (group_num_ + 1), ctx); viennacl::backend::memory_create(coord_buffer_, viennacl::backend::typesafe_host_array().element_size() * 2 * internal_nnz(), ctx); viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * internal_nnz(), ctx); } else { group_boundaries_.switch_active_handle_id(ctx.memory_type()); coord_buffer_.switch_active_handle_id(ctx.memory_type()); elements_.switch_active_handle_id(ctx.memory_type()); #ifdef VIENNACL_WITH_OPENCL if (ctx.memory_type() == OPENCL_MEMORY) { group_boundaries_.opencl_handle().context(ctx.opencl_context()); coord_buffer_.opencl_handle().context(ctx.opencl_context()); elements_.opencl_handle().context(ctx.opencl_context()); } #endif } } /** @brief Construction of a coordinate matrix with the supplied number of rows and columns in the supplied context. Does not yet allocate memory. * * @param rows Number of rows * @param cols Number of columns * @param ctx Context in which to create the matrix */ explicit coordinate_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx) : rows_(rows), cols_(cols), nonzeros_(0) { group_boundaries_.switch_active_handle_id(ctx.memory_type()); coord_buffer_.switch_active_handle_id(ctx.memory_type()); elements_.switch_active_handle_id(ctx.memory_type()); #ifdef VIENNACL_WITH_OPENCL if (ctx.memory_type() == OPENCL_MEMORY) { group_boundaries_.opencl_handle().context(ctx.opencl_context()); coord_buffer_.opencl_handle().context(ctx.opencl_context()); elements_.opencl_handle().context(ctx.opencl_context()); } #endif } /** @brief Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved. */ void reserve(vcl_size_t new_nonzeros) { if (new_nonzeros > nonzeros_) //TODO: Do we need to initialize new memory with zero? { handle_type coord_buffer_old; handle_type elements_old; viennacl::backend::memory_shallow_copy(coord_buffer_, coord_buffer_old); viennacl::backend::memory_shallow_copy(elements_, elements_old); vcl_size_t internal_new_nnz = viennacl::tools::align_to_multiple(new_nonzeros, ALIGNMENT); viennacl::backend::typesafe_host_array size_deducer(coord_buffer_); viennacl::backend::memory_create(coord_buffer_, size_deducer.element_size() * 2 * internal_new_nnz, viennacl::traits::context(coord_buffer_)); viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * internal_new_nnz, viennacl::traits::context(elements_)); viennacl::backend::memory_copy(coord_buffer_old, coord_buffer_, 0, 0, size_deducer.element_size() * 2 * nonzeros_); viennacl::backend::memory_copy(elements_old, elements_, 0, 0, sizeof(SCALARTYPE) * nonzeros_); nonzeros_ = new_nonzeros; } } /** @brief Resize the matrix. * * @param new_size1 New number of rows * @param new_size2 New number of columns * @param preserve If true, the old values are preserved. At present, old values are always discarded. */ void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve = true) { assert (new_size1 > 0 && new_size2 > 0); if (new_size1 < rows_ || new_size2 < cols_) //enlarge buffer { std::vector > stl_sparse_matrix; if (rows_ > 0) stl_sparse_matrix.resize(rows_); if (preserve && rows_ > 0) viennacl::copy(*this, stl_sparse_matrix); stl_sparse_matrix.resize(new_size1); //std::cout << "Cropping STL matrix of size " << stl_sparse_matrix.size() << std::endl; if (new_size2 < cols_ && rows_ > 0) { for (vcl_size_t i=0; i to_delete; for (typename std::map::iterator it = stl_sparse_matrix[i].begin(); it != stl_sparse_matrix[i].end(); ++it) { if (it->first >= new_size2) to_delete.push_back(it->first); } for (std::list::iterator it = to_delete.begin(); it != to_delete.end(); ++it) stl_sparse_matrix[i].erase(*it); } //std::cout << "Cropping done..." << std::endl; } rows_ = new_size1; cols_ = new_size2; viennacl::copy(stl_sparse_matrix, *this); } rows_ = new_size1; cols_ = new_size2; } /** @brief Returns the number of rows */ vcl_size_t size1() const { return rows_; } /** @brief Returns the number of columns */ vcl_size_t size2() const { return cols_; } /** @brief Returns the number of nonzero entries */ vcl_size_t nnz() const { return nonzeros_; } /** @brief Returns the number of internal nonzero entries */ vcl_size_t internal_nnz() const { return viennacl::tools::align_to_multiple(nonzeros_, ALIGNMENT); } /** @brief Returns the OpenCL handle to the (row, column) index array */ const handle_type & handle12() const { return coord_buffer_; } /** @brief Returns the OpenCL handle to the matrix entry array */ const handle_type & handle() const { return elements_; } /** @brief Returns the OpenCL handle to the group start index array */ const handle_type & handle3() const { return group_boundaries_; } vcl_size_t groups() const { return group_num_; } #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment template friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix & gpu_matrix ); #else template friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix & gpu_matrix ); #endif private: /** @brief Copy constructor is by now not available. */ coordinate_matrix(coordinate_matrix const &); /** @brief Assignment is by now not available. */ coordinate_matrix & operator=(coordinate_matrix const &); vcl_size_t rows_; vcl_size_t cols_; vcl_size_t nonzeros_; vcl_size_t group_num_; handle_type coord_buffer_; handle_type elements_; handle_type group_boundaries_; }; // // Specify available operations: // /** \cond */ namespace linalg { namespace detail { // x = A * y template struct op_executor, op_assign, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { // check for the special case x = A * x if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs = temp; } else viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); } }; template struct op_executor, op_inplace_add, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs += temp; } }; template struct op_executor, op_inplace_sub, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs -= temp; } }; // x = A * vec_op template struct op_executor, op_assign, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); } }; // x += A * vec_op template struct op_executor, op_inplace_add, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs += temp_result; } }; // x -= A * vec_op template struct op_executor, op_inplace_sub, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs -= temp_result; } }; } // namespace detail } // namespace linalg /** \endcond */ } #endif ViennaCL-1.5.1-src/viennacl/forwards.h000644 001750 001750 00000071746 12267307531 017604 0ustar00rupprupp000000 000000 #ifndef VIENNACL_FORWARDS_H #define VIENNACL_FORWARDS_H /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/forwards.h @brief This file provides the forward declarations for the main types used within ViennaCL */ /** @mainpage Source Code Documentation for ViennaCL 1.5.1 This is the source code documentation of ViennaCL. Detailed information about the functions in ViennaCL can be found here. For a general overview over the types and functionality provided by ViennaCL, please refer to the file doc/viennacl.pdf */ //compatibility defines: #ifdef VIENNACL_HAVE_UBLAS #define VIENNACL_WITH_UBLAS #endif #ifdef VIENNACL_HAVE_EIGEN #define VIENNACL_WITH_EIGEN #endif #ifdef VIENNACL_HAVE_MTL4 #define VIENNACL_WITH_MTL4 #endif #include #include #include #include "viennacl/meta/enable_if.hpp" /** @brief Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them. */ namespace viennacl { typedef std::size_t vcl_size_t; typedef std::ptrdiff_t vcl_ptrdiff_t; /** @brief A tag class representing assignment */ struct op_assign {}; /** @brief A tag class representing inplace addition */ struct op_inplace_add {}; /** @brief A tag class representing inplace subtraction */ struct op_inplace_sub {}; /** @brief A tag class representing addition */ struct op_add {}; /** @brief A tag class representing subtraction */ struct op_sub {}; /** @brief A tag class representing multiplication by a scalar */ struct op_mult {}; /** @brief A tag class representing matrix-vector products and element-wise multiplications*/ struct op_prod {}; /** @brief A tag class representing matrix-matrix products */ struct op_mat_mat_prod {}; /** @brief A tag class representing division */ struct op_div {}; /** @brief A tag class representing the power function */ struct op_pow {}; /** @brief A tag class representing element-wise binary operations (like multiplication) on vectors or matrices */ template struct op_element_binary {}; /** @brief A tag class representing element-wise unary operations (like sin()) on vectors or matrices */ template struct op_element_unary {}; /** @brief A tag class representing the modulus function for integers */ struct op_abs {}; /** @brief A tag class representing the acos() function */ struct op_acos {}; /** @brief A tag class representing the asin() function */ struct op_asin {}; /** @brief A tag class representing the atan() function */ struct op_atan {}; /** @brief A tag class representing the atan2() function */ struct op_atan2 {}; /** @brief A tag class representing the ceil() function */ struct op_ceil {}; /** @brief A tag class representing the cos() function */ struct op_cos {}; /** @brief A tag class representing the cosh() function */ struct op_cosh {}; /** @brief A tag class representing the exp() function */ struct op_exp {}; /** @brief A tag class representing the fabs() function */ struct op_fabs {}; /** @brief A tag class representing the fdim() function */ struct op_fdim {}; /** @brief A tag class representing the floor() function */ struct op_floor {}; /** @brief A tag class representing the fmax() function */ struct op_fmax {}; /** @brief A tag class representing the fmin() function */ struct op_fmin {}; /** @brief A tag class representing the fmod() function */ struct op_fmod {}; /** @brief A tag class representing the log() function */ struct op_log {}; /** @brief A tag class representing the log10() function */ struct op_log10 {}; /** @brief A tag class representing the sin() function */ struct op_sin {}; /** @brief A tag class representing the sinh() function */ struct op_sinh {}; /** @brief A tag class representing the sqrt() function */ struct op_sqrt {}; /** @brief A tag class representing the tan() function */ struct op_tan {}; /** @brief A tag class representing the tanh() function */ struct op_tanh {}; /** @brief A tag class representing the (off-)diagonal of a matrix */ struct op_matrix_diag {}; /** @brief A tag class representing a matrix given by a vector placed on a certain (off-)diagonal */ struct op_vector_diag {}; /** @brief A tag class representing the extraction of a matrix row to a vector */ struct op_row {}; /** @brief A tag class representing the extraction of a matrix column to a vector */ struct op_column {}; /** @brief A tag class representing inner products of two vectors */ struct op_inner_prod {}; /** @brief A tag class representing the 1-norm of a vector */ struct op_norm_1 {}; /** @brief A tag class representing the 2-norm of a vector */ struct op_norm_2 {}; /** @brief A tag class representing the inf-norm of a vector */ struct op_norm_inf {}; /** @brief A tag class representing the Frobenius-norm of a matrix */ struct op_norm_frobenius {}; /** @brief A tag class representing transposed matrices */ struct op_trans {}; /** @brief A tag class representing sign flips (for scalars only. Vectors and matrices use the standard multiplication by the scalar -1.0) */ struct op_flip_sign {}; //forward declaration of basic types: template class scalar; template class scalar_expression; template class entry_proxy; template class vector_expression; template class vector_iterator; template class const_vector_iterator; template class implicit_vector_base; template class zero_vector; template class unit_vector; template class one_vector; template class scalar_vector; template class vector_base; template class vector; template class vector_tuple; //the following forwards are needed for GMRES template void copy(CPU_ITERATOR const & cpu_begin, CPU_ITERATOR const & cpu_end, vector_iterator gpu_begin); template void copy(const_vector_iterator const & gpu_src_begin, const_vector_iterator const & gpu_src_end, vector_iterator gpu_dest_begin); template void copy(const_vector_iterator const & gpu_src_begin, const_vector_iterator const & gpu_src_end, const_vector_iterator gpu_dest_begin); template void fast_copy(const const_vector_iterator & gpu_begin, const const_vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ); template void fast_copy(CPU_ITERATOR const & cpu_begin, CPU_ITERATOR const & cpu_end, vector_iterator gpu_begin); /** @brief Tag class for indicating row-major layout of a matrix. Not passed to the matrix directly, see row_major type. */ struct row_major_tag {}; /** @brief Tag class for indicating column-major layout of a matrix. Not passed to the matrix directly, see row_major type. */ struct column_major_tag {}; /** @brief A tag for row-major storage of a dense matrix. */ struct row_major { typedef row_major_tag orientation_category; /** @brief Returns the memory offset for entry (i,j) of a dense matrix. * * @param i row index * @param j column index * @param num_cols number of entries per column (including alignment) */ static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t /* num_rows */, vcl_size_t num_cols) { return i * num_cols + j; } }; /** @brief A tag for column-major storage of a dense matrix. */ struct column_major { typedef column_major_tag orientation_category; /** @brief Returns the memory offset for entry (i,j) of a dense matrix. * * @param i row index * @param j column index * @param num_rows number of entries per row (including alignment) */ static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t /* num_cols */) { return i + j * num_rows; } }; struct row_iteration; struct col_iteration; template class matrix_expression; // // Matrix types: // template class matrix_base; template class matrix; template class implicit_matrix_base; template class identity_matrix; template class zero_matrix; template class scalar_matrix; template class compressed_matrix; template class compressed_compressed_matrix; template class coordinate_matrix; template class ell_matrix; template class hyb_matrix; template class circulant_matrix; template class hankel_matrix; template class toeplitz_matrix; template class vandermonde_matrix; // // Proxies: // template class basic_range; typedef basic_range<> range; template class basic_slice; typedef basic_slice<> slice; template class vector_range; template class vector_slice; template class matrix_range; template class matrix_slice; /** @brief Helper struct for checking whether a type is a host scalar type (e.g. float, double) */ template struct is_cpu_scalar { enum { value = false }; }; /** @brief Helper struct for checking whether a type is a viennacl::scalar<> */ template struct is_scalar { enum { value = false }; }; /** @brief Helper struct for checking whether a type represents a sign flip on a viennacl::scalar<> */ template struct is_flip_sign_scalar { enum { value = false }; }; /** @brief Helper struct for checking whether the provided type represents a scalar (either host, from ViennaCL, or a flip-sign proxy) */ template struct is_any_scalar { enum { value = (is_scalar::value || is_cpu_scalar::value || is_flip_sign_scalar::value )}; }; /** @brief Checks for a type being either vector_base or implicit_vector_base */ template struct is_any_vector { enum { value = 0 }; }; /** @brief Checks for either matrix_base or implicit_matrix_base */ template struct is_any_dense_matrix { enum { value = 0 }; }; /** @brief Helper class for checking whether a matrix has a row-major layout. */ template struct is_row_major { enum { value = false }; }; /** @brief Helper class for checking whether a matrix is a compressed_matrix (CSR format) */ template struct is_compressed_matrix { enum { value = false }; }; /** @brief Helper class for checking whether a matrix is a coordinate_matrix (COO format) */ template struct is_coordinate_matrix { enum { value = false }; }; /** @brief Helper class for checking whether a matrix is an ell_matrix (ELL format) */ template struct is_ell_matrix { enum { value = false }; }; /** @brief Helper class for checking whether a matrix is a hyb_matrix (hybrid format: ELL plus CSR) */ template struct is_hyb_matrix { enum { value = false }; }; /** @brief Helper class for checking whether the provided type is one of the sparse matrix types (compressed_matrix, coordinate_matrix, etc.) */ template struct is_any_sparse_matrix { enum { value = false }; }; /** @brief Helper class for checking whether a matrix is a circulant matrix */ template struct is_circulant_matrix { enum { value = false }; }; /** @brief Helper class for checking whether a matrix is a Hankel matrix */ template struct is_hankel_matrix { enum { value = false }; }; /** @brief Helper class for checking whether a matrix is a Toeplitz matrix */ template struct is_toeplitz_matrix { enum { value = false }; }; /** @brief Helper class for checking whether a matrix is a Vandermonde matrix */ template struct is_vandermonde_matrix { enum { value = false }; }; /** @brief Helper class for checking whether the provided type is any of the dense structured matrix types (circulant, Hankel, etc.) */ template struct is_any_dense_structured_matrix { enum { value = viennacl::is_circulant_matrix::value || viennacl::is_hankel_matrix::value || viennacl::is_toeplitz_matrix::value || viennacl::is_vandermonde_matrix::value }; }; enum memory_types { MEMORY_NOT_INITIALIZED , MAIN_MEMORY , OPENCL_MEMORY , CUDA_MEMORY }; /** @brief Exception class in case of memory errors */ class memory_exception : public std::exception { public: memory_exception() : message_() {} memory_exception(std::string message) : message_("ViennaCL: Internal memory error: " + message) {} virtual const char* what() const throw() { return message_.c_str(); } virtual ~memory_exception() throw() {} private: std::string message_; }; class cuda_not_available_exception : public std::exception { public: cuda_not_available_exception() : message_("ViennaCL was compiled without CUDA support, but CUDA functionality required for this operation.") {} virtual const char* what() const throw() { return message_.c_str(); } virtual ~cuda_not_available_exception() throw() {} private: std::string message_; }; class context; namespace tools { //helper for matrix row/col iterators //must be specialized for every viennacl matrix type /** @brief Helper class for incrementing an iterator in a dense matrix. */ template struct MATRIX_ITERATOR_INCREMENTER { typedef typename MATRIXTYPE::ERROR_SPECIALIZATION_FOR_THIS_MATRIX_TYPE_MISSING ErrorIndicator; static void apply(const MATRIXTYPE & /*mat*/, unsigned int & /*row*/, unsigned int & /*col*/) {} }; } namespace linalg { #if !defined(_MSC_VER) || defined(__CUDACC__) template void convolve_i(viennacl::vector& input1, viennacl::vector& input2, viennacl::vector& output); template viennacl::vector_expression, const vector_base, op_element_binary > element_prod(vector_base const & v1, vector_base const & v2); template viennacl::vector_expression, const vector_base, op_element_binary > element_div(vector_base const & v1, vector_base const & v2); template void inner_prod_impl(vector_base const & vec1, vector_base const & vec2, scalar & result); template void inner_prod_impl(viennacl::vector_expression const & vec1, vector_base const & vec2, scalar & result); template void inner_prod_impl(vector_base const & vec1, viennacl::vector_expression const & vec2, scalar & result); template void inner_prod_impl(viennacl::vector_expression const & vec1, viennacl::vector_expression const & vec2, scalar & result); /////////////////////////// template void inner_prod_cpu(vector_base const & vec1, vector_base const & vec2, T & result); template void inner_prod_cpu(viennacl::vector_expression const & vec1, vector_base const & vec2, T & result); template void inner_prod_cpu(vector_base const & vec1, viennacl::vector_expression const & vec2, T & result); template void inner_prod_cpu(viennacl::vector_expression const & vec1, viennacl::vector_expression const & vec2, S3 & result); //forward definition of norm_1_impl function template void norm_1_impl(vector_base const & vec, scalar & result); //template //void norm_1_impl(matrix_base const & A, scalar & result); template void norm_1_impl(viennacl::vector_expression const & vec, scalar & result); template void norm_1_cpu(vector_base const & vec, T & result); //template //void norm_1_cpu(matrix_base const & vec, // T & result); template void norm_1_cpu(viennacl::vector_expression const & vec, S2 & result); //forward definition of norm_2_impl function template void norm_2_impl(vector_base const & vec, scalar & result); template void norm_2_impl(viennacl::vector_expression const & vec, scalar & result); template void norm_2_cpu(vector_base const & vec, T & result); template void norm_2_cpu(viennacl::vector_expression const & vec, S2 & result); //forward definition of norm_inf_impl function template void norm_inf_impl(vector_base const & vec, scalar & result); //template //void norm_inf_impl(matrix_base const & vec, scalar & result); template void norm_inf_impl(viennacl::vector_expression const & vec, scalar & result); template void norm_inf_cpu(vector_base const & vec, T & result); //template //void norm_inf_cpu(matrix_base const & vec, T & result); template void norm_inf_cpu(viennacl::vector_expression const & vec, S2 & result); template void norm_frobenius_impl(matrix_base const & vec, scalar & result); template void norm_frobenius_cpu(matrix_base const & vec, T & result); template vcl_size_t index_norm_inf(vector_base const & vec); template vcl_size_t index_norm_inf(viennacl::vector_expression const & vec); //forward definition of prod_impl functions template void prod_impl(const matrix_base & mat, const vector_base & vec, vector_base & result); template void prod_impl(const matrix_expression< const matrix_base, const matrix_base, op_trans> & mat_trans, const vector_base & vec, vector_base & result); template typename viennacl::enable_if< viennacl::is_any_sparse_matrix::value, vector_expression, op_prod > >::type prod_impl(const SparseMatrixType & mat, const vector & vec); #endif namespace detail { enum row_info_types { SPARSE_ROW_NORM_INF = 0, SPARSE_ROW_NORM_1, SPARSE_ROW_NORM_2, SPARSE_ROW_DIAGONAL }; } /** @brief A tag class representing a lower triangular matrix */ struct lower_tag { static const char * name() { return "lower"; } }; //lower triangular matrix /** @brief A tag class representing an upper triangular matrix */ struct upper_tag { static const char * name() { return "upper"; } }; //upper triangular matrix /** @brief A tag class representing a lower triangular matrix with unit diagonal*/ struct unit_lower_tag { static const char * name() { return "unit_lower"; } }; //unit lower triangular matrix /** @brief A tag class representing an upper triangular matrix with unit diagonal*/ struct unit_upper_tag { static const char * name() { return "unit_upper"; } }; //unit upper triangular matrix //preconditioner tags class ilut_tag; /** @brief A tag class representing the use of no preconditioner */ class no_precond { public: template void apply(VectorType &) const {} }; } //namespace linalg // // More namespace comments to follow: // /** @brief Namespace providing routines for handling the different memory domains. */ namespace backend { /** @brief Provides implementations for handling memory buffers in CPU RAM. */ namespace cpu_ram { /** @brief Holds implementation details for handling memory buffers in CPU RAM. Not intended for direct use by library users. */ namespace detail {} } /** @brief Provides implementations for handling CUDA memory buffers. */ namespace cuda { /** @brief Holds implementation details for handling CUDA memory buffers. Not intended for direct use by library users. */ namespace detail {} } /** @brief Implementation details for the generic memory backend interface. */ namespace detail {} /** @brief Provides implementations for handling OpenCL memory buffers. */ namespace opencl { /** @brief Holds implementation details for handling OpenCL memory buffers. Not intended for direct use by library users. */ namespace detail {} } } /** @brief Holds implementation details for functionality in the main viennacl-namespace. Not intended for direct use by library users. */ namespace detail { /** @brief Helper namespace for fast Fourier transforms. Not to be used directly by library users. */ namespace fft { /** @brief Helper namespace for fast-Fourier transformation. Deprecated. */ namespace FFT_DATA_ORDER {} } } /** @brief Provides an OpenCL kernel generator. */ namespace generator { /** @brief Provides the implementation for tuning the kernels for a particular device. */ namespace autotune {} /** @brief Contains implementation details of the kernel generator. */ namespace detail {} /** @brief Namespace holding the various device-specific parameters for generating the best kernels. */ namespace profiles {} /** @brief Contains various helper routines for kernel generation. */ namespace utils {} } /** @brief Provides basic input-output functionality. */ namespace io { /** @brief Implementation details for IO functionality. Usually not of interest for a library user. */ namespace detail {} /** @brief Namespace holding the various XML tag definitions for the kernel parameter tuning facility. */ namespace tag {} /** @brief Namespace holding the various XML strings for the kernel parameter tuning facility. */ namespace val {} } /** @brief Provides all linear algebra operations which are not covered by operator overloads. */ namespace linalg { /** @brief Holds all CUDA compute kernels used by ViennaCL. */ namespace cuda { /** @brief Helper functions for the CUDA linear algebra backend. */ namespace detail {} } /** @brief Namespace holding implementation details for linear algebra routines. Usually not of interest for a library user. */ namespace detail { /** @brief Implementation namespace for algebraic multigrid preconditioner. */ namespace amg {} /** @brief Implementation namespace for sparse approximate inverse preconditioner. */ namespace spai {} } /** @brief Holds all compute kernels with conventional host-based execution (buffers in CPU RAM). */ namespace host_based { /** @brief Helper functions for the host-based linear algebra backend. */ namespace detail {} } /** @brief Namespace containing the OpenCL kernels. Deprecated, will be moved to viennacl::linalg::opencl in future releases. */ namespace kernels {} /** @brief Holds all routines providing OpenCL linear algebra operations. */ namespace opencl { /** @brief Helper functions for OpenCL-accelerated linear algebra operations. */ namespace detail {} /** @brief Contains the OpenCL kernel generation functions for a predefined set of functionality. */ namespace kernels { /** @brief Implementation details for the predefined OpenCL kernels. */ namespace detail {} } } } /** @brief OpenCL backend. Manages platforms, contexts, buffers, kernels, etc. */ namespace ocl {} /** @brief Namespace containing many meta-functions. */ namespace result_of {} /** @brief Namespace for various tools used within ViennaCL. */ namespace tools { /** @brief Contains implementation details for the tools. Usually not of interest for the library user. */ namespace detail {} } /** @brief Namespace providing traits-information as well as generic wrappers to common routines for vectors and matrices such as size() or clear() */ namespace traits {} /** @brief Contains the scheduling functionality which allows for dynamic kernel generation as well as the fusion of multiple statements into a single kernel. */ namespace scheduler { /** @brief Implementation details for the scheduler */ namespace detail {} /** @brief Helper metafunctions used for the scheduler */ namespace result_of {} } } //namespace viennacl #endif /*@}*/ ViennaCL-1.5.1-src/viennacl/compressed_matrix.hpp000644 001750 001750 00000116017 12267307531 022034 0ustar00rupprupp000000 000000 #ifndef VIENNACL_COMPRESSED_MATRIX_HPP_ #define VIENNACL_COMPRESSED_MATRIX_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/compressed_matrix.hpp @brief Implementation of the compressed_matrix class */ #include #include #include #include "viennacl/forwards.h" #include "viennacl/vector.hpp" #include "viennacl/linalg/sparse_matrix_operations.hpp" #include "viennacl/tools/tools.hpp" #include "viennacl/tools/entry_proxy.hpp" namespace viennacl { namespace detail { template void copy_impl(const CPU_MATRIX & cpu_matrix, compressed_matrix & gpu_matrix, vcl_size_t nonzeros) { assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); viennacl::backend::typesafe_host_array row_buffer(gpu_matrix.handle1(), cpu_matrix.size1() + 1); viennacl::backend::typesafe_host_array col_buffer(gpu_matrix.handle2(), nonzeros); std::vector elements(nonzeros); vcl_size_t row_index = 0; vcl_size_t data_index = 0; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { row_buffer.set(row_index, data_index); ++row_index; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { col_buffer.set(data_index, col_it.index2()); elements[data_index] = *col_it; ++data_index; } data_index = viennacl::tools::align_to_multiple(data_index, ALIGNMENT); //take care of alignment } row_buffer.set(row_index, data_index); gpu_matrix.set(row_buffer.get(), col_buffer.get(), &elements[0], cpu_matrix.size1(), cpu_matrix.size2(), nonzeros); } } //provide copy-operation: /** @brief Copies a sparse matrix from the host to the OpenCL device (either GPU or multi-core CPU) * * There are some type requirements on the CPU_MATRIX type (fulfilled by e.g. boost::numeric::ublas): * - .size1() returns the number of rows * - .size2() returns the number of columns * - const_iterator1 is a type definition for an iterator along increasing row indices * - const_iterator2 is a type definition for an iterator along increasing columns indices * - The const_iterator1 type provides an iterator of type const_iterator2 via members .begin() and .end() that iterates along column indices in the current row. * - The types const_iterator1 and const_iterator2 provide members functions .index1() and .index2() that return the current row and column indices respectively. * - Dereferenciation of an object of type const_iterator2 returns the entry. * * @param cpu_matrix A sparse matrix on the host. * @param gpu_matrix A compressed_matrix from ViennaCL */ template void copy(const CPU_MATRIX & cpu_matrix, compressed_matrix & gpu_matrix ) { if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) { //determine nonzeros: vcl_size_t num_entries = 0; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { vcl_size_t entries_per_row = 0; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ++entries_per_row; } num_entries += viennacl::tools::align_to_multiple(entries_per_row, ALIGNMENT); } if (num_entries == 0) //we copy an empty matrix num_entries = 1; //set up matrix entries: viennacl::detail::copy_impl(cpu_matrix, gpu_matrix, num_entries); } } //adapted for std::vector< std::map < > > argument: /** @brief Copies a sparse square matrix in the std::vector< std::map < > > format to an OpenCL device. Use viennacl::tools::sparse_matrix_adapter for non-square matrices. * * @param cpu_matrix A sparse square matrix on the host using STL types * @param gpu_matrix A compressed_matrix from ViennaCL */ template void copy(const std::vector< std::map > & cpu_matrix, compressed_matrix & gpu_matrix ) { vcl_size_t nonzeros = 0; vcl_size_t max_col = 0; for (vcl_size_t i=0; i 0) nonzeros += ((cpu_matrix[i].size() - 1) / ALIGNMENT + 1) * ALIGNMENT; if (cpu_matrix[i].size() > 0) max_col = std::max(max_col, (cpu_matrix[i].rbegin())->first); } viennacl::detail::copy_impl(tools::const_sparse_matrix_adapter(cpu_matrix, cpu_matrix.size(), max_col + 1), gpu_matrix, nonzeros); } #ifdef VIENNACL_WITH_UBLAS template void copy(const boost::numeric::ublas::compressed_matrix & ublas_matrix, viennacl::compressed_matrix & gpu_matrix) { assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); //we just need to copy the CSR arrays: viennacl::backend::typesafe_host_array row_buffer(gpu_matrix.handle1(), ublas_matrix.size1() + 1); for (vcl_size_t i=0; i<=ublas_matrix.size1(); ++i) row_buffer.set(i, ublas_matrix.index1_data()[i]); viennacl::backend::typesafe_host_array col_buffer(gpu_matrix.handle2(), ublas_matrix.nnz()); for (vcl_size_t i=0; i void copy(const Eigen::SparseMatrix & eigen_matrix, compressed_matrix & gpu_matrix) { assert( (gpu_matrix.size1() == 0 || static_cast(eigen_matrix.rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (gpu_matrix.size2() == 0 || static_cast(eigen_matrix.cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); std::vector< std::map > stl_matrix(eigen_matrix.rows()); for (int k=0; k < eigen_matrix.outerSize(); ++k) for (typename Eigen::SparseMatrix::InnerIterator it(eigen_matrix, k); it; ++it) stl_matrix[it.row()][it.col()] = it.value(); copy(tools::const_sparse_matrix_adapter(stl_matrix, eigen_matrix.rows(), eigen_matrix.cols()), gpu_matrix); } #endif #ifdef VIENNACL_WITH_MTL4 template void copy(const mtl::compressed2D & cpu_matrix, compressed_matrix & gpu_matrix) { assert( (gpu_matrix.size1() == 0 || static_cast(cpu_matrix.num_rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (gpu_matrix.size2() == 0 || static_cast(cpu_matrix.num_cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); typedef mtl::compressed2D MatrixType; std::vector< std::map > stl_matrix(cpu_matrix.num_rows()); using mtl::traits::range_generator; using mtl::traits::range::min; // Choose between row and column traversal typedef typename min, range_generator >::type range_type; range_type my_range; // Type of outer cursor typedef typename range_type::type c_type; // Type of inner cursor typedef typename mtl::traits::range_generator::type ic_type; // Define the property maps typename mtl::traits::row::type row(cpu_matrix); typename mtl::traits::col::type col(cpu_matrix); typename mtl::traits::const_value::type value(cpu_matrix); // Now iterate over the matrix for (c_type cursor(my_range.begin(cpu_matrix)), cend(my_range.end(cpu_matrix)); cursor != cend; ++cursor) for (ic_type icursor(mtl::begin(cursor)), icend(mtl::end(cursor)); icursor != icend; ++icursor) stl_matrix[row(*icursor)][col(*icursor)] = value(*icursor); copy(tools::const_sparse_matrix_adapter(stl_matrix, cpu_matrix.num_rows(), cpu_matrix.num_cols()), gpu_matrix); } #endif // // gpu to cpu: // /** @brief Copies a sparse matrix from the OpenCL device (either GPU or multi-core CPU) to the host. * * There are two type requirements on the CPU_MATRIX type (fulfilled by e.g. boost::numeric::ublas): * - resize(rows, cols) A resize function to bring the matrix into the correct size * - operator(i,j) Write new entries via the parenthesis operator * * @param gpu_matrix A compressed_matrix from ViennaCL * @param cpu_matrix A sparse matrix on the host. */ template void copy(const compressed_matrix & gpu_matrix, CPU_MATRIX & cpu_matrix ) { assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) { //get raw data from memory: viennacl::backend::typesafe_host_array row_buffer(gpu_matrix.handle1(), cpu_matrix.size1() + 1); viennacl::backend::typesafe_host_array col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); std::vector elements(gpu_matrix.nnz()); //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl; viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)* gpu_matrix.nnz(), &(elements[0])); //fill the cpu_matrix: vcl_size_t data_index = 0; for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) { while (data_index < row_buffer[row]) { if (col_buffer[data_index] >= gpu_matrix.size2()) { std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl; return; } if (elements[data_index] != static_cast(0.0)) cpu_matrix(row-1, static_cast(col_buffer[data_index])) = elements[data_index]; ++data_index; } } } } /** @brief Copies a sparse matrix from an OpenCL device to the host. The host type is the std::vector< std::map < > > format . * * @param gpu_matrix A compressed_matrix from ViennaCL * @param cpu_matrix A sparse matrix on the host. */ template void copy(const compressed_matrix & gpu_matrix, std::vector< std::map > & cpu_matrix) { tools::sparse_matrix_adapter temp(cpu_matrix, cpu_matrix.size(), cpu_matrix.size()); copy(gpu_matrix, temp); } #ifdef VIENNACL_WITH_UBLAS template void copy(viennacl::compressed_matrix const & gpu_matrix, boost::numeric::ublas::compressed_matrix & ublas_matrix) { assert( (viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); viennacl::backend::typesafe_host_array row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); viennacl::backend::typesafe_host_array col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); ublas_matrix.clear(); ublas_matrix.reserve(gpu_matrix.nnz()); ublas_matrix.set_filled(gpu_matrix.size1() + 1, gpu_matrix.nnz()); for (vcl_size_t i=0; i void copy(compressed_matrix & gpu_matrix, Eigen::SparseMatrix & eigen_matrix) { assert( (static_cast(eigen_matrix.rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (static_cast(eigen_matrix.cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) { //get raw data from memory: viennacl::backend::typesafe_host_array row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); viennacl::backend::typesafe_host_array col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); std::vector elements(gpu_matrix.nnz()); viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)* gpu_matrix.nnz(), &(elements[0])); eigen_matrix.setZero(); vcl_size_t data_index = 0; for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) { while (data_index < row_buffer[row]) { assert(col_buffer[data_index] < gpu_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer")); if (elements[data_index] != static_cast(0.0)) eigen_matrix.insert(row-1, col_buffer[data_index]) = elements[data_index]; ++data_index; } } } } #endif #ifdef VIENNACL_WITH_MTL4 template void copy(compressed_matrix & gpu_matrix, mtl::compressed2D & mtl4_matrix) { assert( (static_cast(mtl4_matrix.num_rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (static_cast(mtl4_matrix.num_cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) { //get raw data from memory: viennacl::backend::typesafe_host_array row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); viennacl::backend::typesafe_host_array col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); std::vector elements(gpu_matrix.nnz()); viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)* gpu_matrix.nnz(), &(elements[0])); //set_to_zero(mtl4_matrix); //mtl4_matrix.change_dim(gpu_matrix.size1(), gpu_matrix.size2()); mtl::matrix::inserter< mtl::compressed2D > ins(mtl4_matrix); vcl_size_t data_index = 0; for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) { while (data_index < row_buffer[row]) { assert(col_buffer[data_index] < gpu_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer")); if (elements[data_index] != static_cast(0.0)) ins(row-1, col_buffer[data_index]) << typename mtl::Collection< mtl::compressed2D >::value_type(elements[data_index]); ++data_index; } } } } #endif //////////////////////// compressed_matrix ////////////////////////// /** @brief A sparse square matrix in compressed sparse rows format. * * @tparam SCALARTYPE The floating point type (either float or double, checked at compile time) * @tparam ALIGNMENT The internal memory size for the entries in each row is given by (size()/ALIGNMENT + 1) * ALIGNMENT. ALIGNMENT must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. */ template class compressed_matrix { public: typedef viennacl::backend::mem_handle handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; /** @brief Default construction of a compressed matrix. No memory is allocated */ compressed_matrix() : rows_(0), cols_(0), nonzeros_(0) {} /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated * * @param rows Number of rows * @param cols Number of columns * @param nonzeros Optional number of nonzeros for memory preallocation * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) */ explicit compressed_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros = 0, viennacl::context ctx = viennacl::context()) : rows_(rows), cols_(cols), nonzeros_(nonzeros) { row_buffer_.switch_active_handle_id(ctx.memory_type()); col_buffer_.switch_active_handle_id(ctx.memory_type()); elements_.switch_active_handle_id(ctx.memory_type()); #ifdef VIENNACL_WITH_OPENCL if (ctx.memory_type() == OPENCL_MEMORY) { row_buffer_.opencl_handle().context(ctx.opencl_context()); col_buffer_.opencl_handle().context(ctx.opencl_context()); elements_.opencl_handle().context(ctx.opencl_context()); } #endif if (rows > 0) { viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array().element_size() * (rows + 1), ctx); } if (nonzeros > 0) { viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array().element_size() * nonzeros, ctx); viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * nonzeros, ctx); } } /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated * * @param rows Number of rows * @param cols Number of columns * @param ctx Context in which to create the matrix */ explicit compressed_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx) : rows_(rows), cols_(cols), nonzeros_(0) { row_buffer_.switch_active_handle_id(ctx.memory_type()); col_buffer_.switch_active_handle_id(ctx.memory_type()); elements_.switch_active_handle_id(ctx.memory_type()); #ifdef VIENNACL_WITH_OPENCL if (ctx.memory_type() == OPENCL_MEMORY) { row_buffer_.opencl_handle().context(ctx.opencl_context()); col_buffer_.opencl_handle().context(ctx.opencl_context()); elements_.opencl_handle().context(ctx.opencl_context()); } #endif if (rows > 0) { viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array().element_size() * (rows + 1), ctx); } } explicit compressed_matrix(viennacl::context ctx) : rows_(0), cols_(0), nonzeros_(0) { row_buffer_.switch_active_handle_id(ctx.memory_type()); col_buffer_.switch_active_handle_id(ctx.memory_type()); elements_.switch_active_handle_id(ctx.memory_type()); #ifdef VIENNACL_WITH_OPENCL if (ctx.memory_type() == OPENCL_MEMORY) { row_buffer_.opencl_handle().context(ctx.opencl_context()); col_buffer_.opencl_handle().context(ctx.opencl_context()); elements_.opencl_handle().context(ctx.opencl_context()); } #endif } #ifdef VIENNACL_WITH_OPENCL explicit compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_col_buffer, cl_mem mem_elements, vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros) : rows_(rows), cols_(cols), nonzeros_(nonzeros) { row_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); row_buffer_.opencl_handle() = mem_row_buffer; row_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. row_buffer_.raw_size(sizeof(cl_uint) * (rows + 1)); col_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); col_buffer_.opencl_handle() = mem_col_buffer; col_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. col_buffer_.raw_size(sizeof(cl_uint) * nonzeros); elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); elements_.opencl_handle() = mem_elements; elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. elements_.raw_size(sizeof(SCALARTYPE) * nonzeros); } #endif /** @brief Assignment a compressed matrix from possibly another memory domain. */ compressed_matrix & operator=(compressed_matrix const & other) { assert( (rows_ == 0 || rows_ == other.size1()) && bool("Size mismatch") ); assert( (cols_ == 0 || cols_ == other.size2()) && bool("Size mismatch") ); rows_ = other.size1(); cols_ = other.size2(); nonzeros_ = other.nnz(); viennacl::backend::typesafe_memory_copy(other.row_buffer_, row_buffer_); viennacl::backend::typesafe_memory_copy(other.col_buffer_, col_buffer_); viennacl::backend::typesafe_memory_copy(other.elements_, elements_); return *this; } /** @brief Sets the row, column and value arrays of the compressed matrix * * @param row_jumper Pointer to an array holding the indices of the first element of each row (starting with zero). E.g. row_jumper[10] returns the index of the first entry of the 11th row. The array length is 'cols + 1' * @param col_buffer Pointer to an array holding the column index of each entry. The array length is 'nonzeros' * @param elements Pointer to an array holding the entries of the sparse matrix. The array length is 'elements' * @param rows Number of rows of the sparse matrix * @param cols Number of columns of the sparse matrix * @param nonzeros Number of nonzeros */ void set(const void * row_jumper, const void * col_buffer, const SCALARTYPE * elements, vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros) { assert( (rows > 0) && bool("Error in compressed_matrix::set(): Number of rows must be larger than zero!")); assert( (cols > 0) && bool("Error in compressed_matrix::set(): Number of columns must be larger than zero!")); assert( (nonzeros > 0) && bool("Error in compressed_matrix::set(): Number of nonzeros must be larger than zero!")); //std::cout << "Setting memory: " << cols + 1 << ", " << nonzeros << std::endl; //row_buffer_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array(row_buffer_).element_size() * (rows + 1), viennacl::traits::context(row_buffer_), row_jumper); //col_buffer_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array(col_buffer_).element_size() * nonzeros, viennacl::traits::context(col_buffer_), col_buffer); //elements_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * nonzeros, viennacl::traits::context(elements_), elements); nonzeros_ = nonzeros; rows_ = rows; cols_ = cols; } /** @brief Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved. */ void reserve(vcl_size_t new_nonzeros) { if (new_nonzeros > nonzeros_) { handle_type col_buffer_old; handle_type elements_old; viennacl::backend::memory_shallow_copy(col_buffer_, col_buffer_old); viennacl::backend::memory_shallow_copy(elements_, elements_old); viennacl::backend::typesafe_host_array size_deducer(col_buffer_); viennacl::backend::memory_create(col_buffer_, size_deducer.element_size() * new_nonzeros, viennacl::traits::context(col_buffer_)); viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * new_nonzeros, viennacl::traits::context(elements_)); viennacl::backend::memory_copy(col_buffer_old, col_buffer_, 0, 0, size_deducer.element_size() * nonzeros_); viennacl::backend::memory_copy(elements_old, elements_, 0, 0, sizeof(SCALARTYPE)* nonzeros_); nonzeros_ = new_nonzeros; } } /** @brief Resize the matrix. * * @param new_size1 New number of rows * @param new_size2 New number of columns * @param preserve If true, the old values are preserved. At present, old values are always discarded. */ void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve = true) { assert(new_size1 > 0 && new_size2 > 0 && bool("Cannot resize to zero size!")); if (new_size1 != rows_ || new_size2 != cols_) { std::vector > stl_sparse_matrix; if (rows_ > 0) { if (preserve) { stl_sparse_matrix.resize(rows_); viennacl::copy(*this, stl_sparse_matrix); } else stl_sparse_matrix[0][0] = 0; } else { stl_sparse_matrix.resize(new_size1); stl_sparse_matrix[0][0] = 0; //enforces nonzero array sizes if matrix was initially empty } stl_sparse_matrix.resize(new_size1); //discard entries with column index larger than new_size2 if (new_size2 < cols_ && rows_ > 0) { for (vcl_size_t i=0; i to_delete; for (typename std::map::iterator it = stl_sparse_matrix[i].begin(); it != stl_sparse_matrix[i].end(); ++it) { if (it->first >= new_size2) to_delete.push_back(it->first); } for (std::list::iterator it = to_delete.begin(); it != to_delete.end(); ++it) stl_sparse_matrix[i].erase(*it); } } viennacl::copy(stl_sparse_matrix, *this); rows_ = new_size1; cols_ = new_size2; } } /** @brief Returns a reference to the (i,j)-th entry of the sparse matrix. If (i,j) does not exist (zero), it is inserted (slow!) */ entry_proxy operator()(vcl_size_t i, vcl_size_t j) { assert( (i < rows_) && (j < cols_) && bool("compressed_matrix access out of bounds!")); vcl_size_t index = element_index(i, j); // check for element in sparsity pattern if (index < nonzeros_) return entry_proxy(index, elements_); // Element not found. Copying required. Very slow, but direct entry manipulation is painful anyway... std::vector< std::map > cpu_backup(rows_); tools::sparse_matrix_adapter adapted_cpu_backup(cpu_backup, rows_, cols_); viennacl::copy(*this, adapted_cpu_backup); cpu_backup[i][static_cast(j)] = 0.0; viennacl::copy(adapted_cpu_backup, *this); index = element_index(i, j); assert(index < nonzeros_); return entry_proxy(index, elements_); } /** @brief Returns the number of rows */ const vcl_size_t & size1() const { return rows_; } /** @brief Returns the number of columns */ const vcl_size_t & size2() const { return cols_; } /** @brief Returns the number of nonzero entries */ const vcl_size_t & nnz() const { return nonzeros_; } /** @brief Returns the OpenCL handle to the row index array */ const handle_type & handle1() const { return row_buffer_; } /** @brief Returns the OpenCL handle to the column index array */ const handle_type & handle2() const { return col_buffer_; } /** @brief Returns the OpenCL handle to the matrix entry array */ const handle_type & handle() const { return elements_; } /** @brief Returns the OpenCL handle to the row index array */ handle_type & handle1() { return row_buffer_; } /** @brief Returns the OpenCL handle to the column index array */ handle_type & handle2() { return col_buffer_; } /** @brief Returns the OpenCL handle to the matrix entry array */ handle_type & handle() { return elements_; } void switch_memory_context(viennacl::context new_ctx) { viennacl::backend::switch_memory_context(row_buffer_, new_ctx); viennacl::backend::switch_memory_context(col_buffer_, new_ctx); viennacl::backend::switch_memory_context(elements_, new_ctx); } viennacl::memory_types memory_context() const { return row_buffer_.get_active_handle_id(); } private: vcl_size_t element_index(vcl_size_t i, vcl_size_t j) { //read row indices viennacl::backend::typesafe_host_array row_indices(row_buffer_, 2); viennacl::backend::memory_read(row_buffer_, row_indices.element_size()*i, row_indices.element_size()*2, row_indices.get()); //get column indices for row i: viennacl::backend::typesafe_host_array col_indices(col_buffer_, row_indices[1] - row_indices[0]); viennacl::backend::memory_read(col_buffer_, col_indices.element_size()*row_indices[0], row_indices.element_size()*col_indices.size(), col_indices.get()); //get entries for row i: viennacl::backend::typesafe_host_array row_entries(elements_, row_indices[1] - row_indices[0]); viennacl::backend::memory_read(elements_, sizeof(SCALARTYPE)*row_indices[0], sizeof(SCALARTYPE)*row_entries.size(), row_entries.get()); for (vcl_size_t k=0; k struct op_executor, op_assign, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { // check for the special case x = A * x if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs = temp; } else viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); } }; template struct op_executor, op_inplace_add, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs += temp; } }; template struct op_executor, op_inplace_sub, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs -= temp; } }; // x = A * vec_op template struct op_executor, op_assign, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); } }; // x = A * vec_op template struct op_executor, op_inplace_add, vector_expression, vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs += temp_result; } }; // x = A * vec_op template struct op_executor, op_inplace_sub, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs -= temp_result; } }; } // namespace detail } // namespace linalg /** \endcond */ } #endif ViennaCL-1.5.1-src/viennacl/vector_proxy.hpp000644 001750 001750 00000024455 12267307531 021053 0ustar00rupprupp000000 000000 #ifndef VIENNACL_VECTOR_PROXY_HPP_ #define VIENNACL_VECTOR_PROXY_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file vector_proxy.hpp @brief Proxy classes for vectors. */ #include "viennacl/forwards.h" #include "viennacl/range.hpp" #include "viennacl/slice.hpp" #include "viennacl/vector.hpp" #include "viennacl/tools/entry_proxy.hpp" namespace viennacl { /** @brief Class for representing non-strided subvectors of a bigger vector x. * * In MATLAB notation, this could for example refer to the subvector x(3:8) of a vector x. */ template class vector_range : public vector_base { typedef vector_range self_type; typedef vector_base base_type; public: typedef typename VectorType::value_type value_type; typedef range::size_type size_type; typedef range::difference_type difference_type; typedef value_type reference; typedef const value_type & const_reference; typedef typename VectorType::const_iterator const_iterator; typedef typename VectorType::iterator iterator; typedef typename VectorType::cpu_value_type cpu_value_type; static const int alignment = VectorType::alignment; vector_range(VectorType & v, range const & entry_range) : base_type(v.handle(), entry_range.size(), v.start() + v.stride() * entry_range.start(), v.stride()) {} using base_type::operator=; }; ///////////////////////////////////////////////////////////// ///////////////////////// CPU to GPU //////////////////////// ///////////////////////////////////////////////////////////// template void copy(const VectorType & cpu_vector, vector_range > & gpu_vector_range ) { assert(cpu_vector.end() - cpu_vector.begin() >= 0 && bool("Range must have nonnegative length!")); if (cpu_vector.end() - cpu_vector.begin() > 0) { //we require that the size of the gpu_vector is larger or equal to the cpu-size std::vector temp_buffer(cpu_vector.end() - cpu_vector.begin()); std::copy(cpu_vector.begin(), cpu_vector.end(), temp_buffer.begin()); viennacl::backend::memory_write(gpu_vector_range.handle(), sizeof(SCALARTYPE)*gpu_vector_range.start(), sizeof(SCALARTYPE)*temp_buffer.size(), &(temp_buffer[0])); } } /** @brief Transfer from a cpu vector to a gpu vector. Convenience wrapper for viennacl::linalg::fast_copy(cpu_vec.begin(), cpu_vec.end(), gpu_vec.begin()); * * @param cpu_vec A cpu vector. Type requirements: Iterator can be obtained via member function .begin() and .end() * @param gpu_vec The gpu vector. */ template void fast_copy(const CPUVECTOR & cpu_vec, vector_range & gpu_vec) { viennacl::fast_copy(cpu_vec.begin(), cpu_vec.end(), gpu_vec.begin()); } ///////////////////////////////////////////////////////////// ///////////////////////// GPU to CPU //////////////////////// ///////////////////////////////////////////////////////////// template void copy(vector_range > const & gpu_vector_range, VectorType & cpu_vector) { assert(cpu_vector.end() - cpu_vector.begin() >= 0 && bool("Range must have nonnegative length!")); if (cpu_vector.end() > cpu_vector.begin()) { std::vector temp_buffer(cpu_vector.end() - cpu_vector.begin()); viennacl::backend::memory_read(gpu_vector_range.handle(), sizeof(SCALARTYPE)*gpu_vector_range.start(), sizeof(SCALARTYPE)*temp_buffer.size(), &(temp_buffer[0])); //now copy entries to cpu_vec: std::copy(temp_buffer.begin(), temp_buffer.end(), cpu_vector.begin()); } } /** @brief Transfer from a GPU vector range to a CPU vector. Convenience wrapper for viennacl::linalg::fast_copy(gpu_vec.begin(), gpu_vec.end(), cpu_vec.begin()); * * @param gpu_vec A gpu vector range. * @param cpu_vec The cpu vector. Type requirements: Output iterator can be obtained via member function .begin() */ template void fast_copy(vector_range< VectorType > const & gpu_vec, CPUVECTOR & cpu_vec ) { viennacl::fast_copy(gpu_vec.begin(), gpu_vec.end(), cpu_vec.begin()); } // // Convenience function // template vector_range project(VectorType & vec, viennacl::range const & r1) { return vector_range(vec, r1); } template vector_range project(viennacl::vector_range & vec, viennacl::range const & r1) { assert(r1.size() <= vec.size() && bool("Size of range invalid!")); return vector_range(vec, viennacl::range(vec.start() + r1.start(), vec.start() + r1.start() + r1.size())); } // // // /////////////////////////////// Slice ///////////////////////////////////////////// // // // /** @brief Class for representing strided subvectors of a bigger vector x. * * In MATLAB notation, this could for example refer to the subvector x(3:2:8) of a vector x. */ template class vector_slice : public vector_base { typedef vector_slice self_type; typedef vector_base base_type; public: typedef typename VectorType::value_type value_type; typedef slice::size_type size_type; typedef slice::difference_type difference_type; typedef value_type reference; typedef const value_type & const_reference; typedef typename VectorType::const_iterator const_iterator; typedef typename VectorType::iterator iterator; typedef typename VectorType::cpu_value_type cpu_value_type; static const int alignment = VectorType::alignment; vector_slice(VectorType & v, slice const & entry_slice) : base_type(v.handle(), entry_slice.size(), v.start() + v.stride() * entry_slice.start(), v.stride() * entry_slice.stride()) {} using base_type::operator=; }; ///////////////////////////////////////////////////////////// ///////////////////////// CPU to GPU //////////////////////// ///////////////////////////////////////////////////////////// template void copy(const VectorType & cpu_vector, vector_slice > & gpu_vector_slice ) { if (cpu_vector.size() > 0) { std::vector temp_buffer(gpu_vector_slice.stride() * gpu_vector_slice.size()); viennacl::backend::memory_read(gpu_vector_slice.handle(), sizeof(SCALARTYPE)*gpu_vector_slice.start(), sizeof(SCALARTYPE)*temp_buffer.size(), &(temp_buffer[0])); for (vcl_size_t i=0; i void copy(vector_slice > const & gpu_vector_slice, VectorType & cpu_vector) { assert(gpu_vector_slice.end() - gpu_vector_slice.begin() >= 0 && bool("Range must have nonnegative length!")); if (gpu_vector_slice.end() - gpu_vector_slice.begin() > 0) { std::vector temp_buffer(gpu_vector_slice.stride() * gpu_vector_slice.size()); viennacl::backend::memory_read(gpu_vector_slice.handle(), sizeof(SCALARTYPE)*gpu_vector_slice.start(), sizeof(SCALARTYPE)*temp_buffer.size(), &(temp_buffer[0])); for (vcl_size_t i=0; i vector_slice project(VectorType & vec, viennacl::slice const & s1) { assert(s1.size() <= vec.size() && bool("Size of slice larger than vector size!")); return vector_slice(vec, s1); } template vector_slice project(viennacl::vector_slice & vec, viennacl::slice const & s1) { assert(s1.size() <= vec.size() && bool("Size of slice larger than vector proxy!")); return vector_slice(vec, viennacl::slice(vec.start() + s1.start(), vec.stride() * s1.stride(), s1.size())); } // interaction with range and vector_range: template vector_slice project(viennacl::vector_slice & vec, viennacl::range const & r1) { assert(r1.size() <= vec.size() && bool("Size of slice larger than vector proxy!")); return vector_slice(vec, viennacl::slice(vec.start() + r1.start(), vec.stride(), r1.size())); } template vector_slice project(viennacl::vector_range & vec, viennacl::slice const & s1) { assert(s1.size() <= vec.size() && bool("Size of slice larger than vector proxy!")); return vector_slice(vec, viennacl::range(vec.start() + s1.start(), s1.stride(), s1.size())); } } #endif ViennaCL-1.5.1-src/viennacl/hankel_matrix.hpp000644 001750 001750 00000031732 12267307531 021132 0ustar00rupprupp000000 000000 #ifndef VIENNACL_HANKEL_MATRIX_HPP #define VIENNACL_HANKEL_MATRIX_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file hankel_matrix.hpp @brief Implementation of the hankel_matrix class for efficient manipulation of Hankel matrices. Experimental. */ #include "viennacl/forwards.h" #include "viennacl/vector.hpp" #include "viennacl/ocl/backend.hpp" #include "viennacl/toeplitz_matrix.hpp" #include "viennacl/fft.hpp" #include "viennacl/linalg/hankel_matrix_operations.hpp" namespace viennacl { /** @brief A Hankel matrix class * * @tparam SCALARTYPE The underlying scalar type (either float or double) * @tparam ALIGNMENT The internal memory size is given by (size()/ALIGNMENT + 1) * ALIGNMENT. ALIGNMENT must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. */ template class hankel_matrix { public: typedef viennacl::backend::mem_handle handle_type; typedef scalar::ResultType> value_type; /** * @brief The default constructor. Does not allocate any memory. * */ explicit hankel_matrix() {} /** * @brief Creates the matrix with the given size * * @param rows Number of rows of the matrix * @param cols Number of columns of the matrix */ explicit hankel_matrix(vcl_size_t rows, vcl_size_t cols) : elements_(rows, cols) { assert(rows == cols && bool("Hankel matrix must be square!")); (void)cols; // avoid 'unused parameter' warning in optimized builds } /** @brief Resizes the matrix. * Existing entries can be preserved * * @param sz New size of matrix * @param preserve If true, existing values are preserved. */ void resize(vcl_size_t sz, bool preserve = true) { elements_.resize(sz, preserve); } /** @brief Returns the OpenCL handle * * @return OpenCL handle */ handle_type const & handle() const { return elements_.handle(); } /** * @brief Returns an internal viennacl::toeplitz_matrix, which represents a Hankel matrix elements * */ toeplitz_matrix & elements() { return elements_; } toeplitz_matrix const & elements() const { return elements_; } /** * @brief Returns the number of rows of the matrix */ vcl_size_t size1() const { return elements_.size1(); } /** * @brief Returns the number of columns of the matrix */ vcl_size_t size2() const { return elements_.size2(); } /** @brief Returns the internal size of matrix representtion. * Usually required for launching OpenCL kernels only * * @return Internal size of matrix representation */ vcl_size_t internal_size() const { return elements_.internal_size(); } /** * @brief Read-write access to a element of the matrix * * @param row_index Row index of accessed element * @param col_index Column index of accessed element * @return Proxy for matrix entry */ entry_proxy operator()(unsigned int row_index, unsigned int col_index) { assert(row_index < size1() && col_index < size2() && bool("Invalid access")); return elements_(size1() - row_index - 1, col_index); } /** * @brief += operation for Hankel matrices * * @param that Matrix which will be added * @return Result of addition */ hankel_matrix& operator +=(hankel_matrix& that) { elements_ += that.elements(); return *this; } private: hankel_matrix(hankel_matrix const &) {} hankel_matrix & operator=(hankel_matrix const & t); toeplitz_matrix elements_; }; /** @brief Copies a Hankel matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) * * * @param cpu_vec A std::vector on the host. * @param gpu_mat A hankel_matrix from ViennaCL */ template void copy(std::vector const & cpu_vec, hankel_matrix & gpu_mat) { assert((gpu_mat.size1() * 2 - 1) == cpu_vec.size() && bool("Size mismatch")); copy(cpu_vec, gpu_mat.elements()); } /** @brief Copies a Hankel matrix from the OpenCL device (either GPU or multi-core CPU) to the std::vector * * * @param gpu_mat A hankel_matrix from ViennaCL * @param cpu_vec A std::vector on the host. */ template void copy(hankel_matrix const & gpu_mat, std::vector & cpu_vec) { assert((gpu_mat.size1() * 2 - 1) == cpu_vec.size() && bool("Size mismatch")); copy(gpu_mat.elements(), cpu_vec); } /** @brief Copies a Hankel matrix from the OpenCL device (either GPU or multi-core CPU) to the matrix-like object * * * @param han_src A hankel_matrix from ViennaCL * @param com_dst A matrix-like object */ template void copy(hankel_matrix const & han_src, MATRIXTYPE& com_dst) { assert( (viennacl::traits::size1(com_dst) == han_src.size1()) && bool("Size mismatch") ); assert( (viennacl::traits::size2(com_dst) == han_src.size2()) && bool("Size mismatch") ); vcl_size_t size = han_src.size1(); std::vector tmp(size * 2 - 1); copy(han_src, tmp); for (vcl_size_t i = 0; i < size; i++) for (vcl_size_t j = 0; j < size; j++) com_dst(i, j) = tmp[i + j]; } /** @brief Copies a the matrix-like object to the Hankel matrix from the OpenCL device (either GPU or multi-core CPU) * * * @param com_src A std::vector on the host * @param han_dst A hankel_matrix from ViennaCL */ template void copy(MATRIXTYPE const & com_src, hankel_matrix& han_dst) { assert( (han_dst.size1() == 0 || viennacl::traits::size1(com_src) == han_dst.size1()) && bool("Size mismatch") ); assert( (han_dst.size2() == 0 || viennacl::traits::size2(com_src) == han_dst.size2()) && bool("Size mismatch") ); assert( viennacl::traits::size2(com_src) == viennacl::traits::size1(com_src) && bool("Logic error: non-square Hankel matrix!") ); vcl_size_t size = viennacl::traits::size1(com_src); std::vector tmp(2*size - 1); for (vcl_size_t i = 0; i < size; i++) tmp[i] = com_src(0, i); for (vcl_size_t i = 1; i < size; i++) tmp[size + i - 1] = com_src(size - 1, i); viennacl::copy(tmp, han_dst); } /*template void prod_impl(hankel_matrix& mat, vector& vec, vector& result) { prod_impl(mat.elements(), vec, result); fft::reverse(result); }*/ template std::ostream & operator<<(std::ostream & s, hankel_matrix& gpu_matrix) { vcl_size_t size = gpu_matrix.size1(); std::vector tmp(2*size - 1); copy(gpu_matrix, tmp); s << "[" << size << "," << size << "]("; for(vcl_size_t i = 0; i < size; i++) { s << "("; for(vcl_size_t j = 0; j < size; j++) { s << tmp[i + j]; //s << (int)i - (int)j; if(j < (size - 1)) s << ","; } s << ")"; } s << ")"; return s; } // // Specify available operations: // /** \cond */ namespace linalg { namespace detail { // x = A * y template struct op_executor, op_assign, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { // check for the special case x = A * x if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs = temp; } else viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); } }; template struct op_executor, op_inplace_add, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs += temp; } }; template struct op_executor, op_inplace_sub, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs -= temp; } }; // x = A * vec_op template struct op_executor, op_assign, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs()); viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); } }; // x = A * vec_op template struct op_executor, op_inplace_add, vector_expression, vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs()); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs += temp_result; } }; // x = A * vec_op template struct op_executor, op_inplace_sub, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs()); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs -= temp_result; } }; } // namespace detail } // namespace linalg /** \endcond */ } #endif // VIENNACL_HANKEL_MATRIX_HPP ViennaCL-1.5.1-src/viennacl/generator/000755 001750 001750 00000000000 12267307531 017553 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/viennacl/generator/autotune.hpp000644 001750 001750 00000017505 12267307531 022140 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_AUTOTUNE_HPP #define VIENNACL_GENERATOR_AUTOTUNE_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/autotune.hpp * * @brief User interface for the autotuning procedure */ #include #include #include #include #include "viennacl/ocl/kernel.hpp" #include "viennacl/ocl/infos.hpp" #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/generate.hpp" #include "viennacl/tools/timer.hpp" namespace viennacl{ namespace generator{ namespace autotune{ /** @brief class for a tuning parameter */ class tuning_param{ public: /** @brief The constructor * * @param values The set of values which this particular tuning parameter can take */ tuning_param(std::vector const & values) : values_(values){ reset(); } /** @brief Returns true if the parameter has reached its maximum value */ bool is_max() const { return current_ == (values_.size()-1); } /** @brief Increments the parameter */ bool inc(){ ++current_ ; if(current_ < values_.size() ) return false; reset(); return true; } /** @brief Returns the current value of the parameter */ int current() const{ return values_[current_]; } /** @brief Resets the parameter to its minimum value */ void reset() { current_ = 0; } private: std::vector values_; unsigned int current_; }; /** @brief Tuning configuration * * ConfigType must have a profile_type typedef * ConfigType must implement is_invalid that returns whether or not a given parameter is invalid * ConfigType must implement create_profile that creates a profile_type given a set of parameters * * Parameters are stored in a std::map */ template class tuning_config{ private: /** @brief Storage type of the parameters */ typedef std::map params_t; public: typedef ConfigType config_type; /** @brief Accessor for profile_type */ typedef typename config_type::profile_type profile_type; /** @brief Add a tuning parameter to the config */ void add_tuning_param(std::string const & name, std::vector const & values){ params_.insert(std::make_pair(name,values)); } /** @brief Returns true if the tuning config has still not explored all its possibilities */ bool has_next() const{ bool res = false; for(typename params_t::const_iterator it = params_.begin() ; it != params_.end() ; ++it) res = res || !it->second.is_max(); return res; } /** @brief Update the parameters of the config */ void update(){ for(typename params_t::iterator it = params_.begin() ; it != params_.end() ; ++it) if(it->second.inc()==false) break; } /** @brief Returns true if the compilation/execution of the underlying profile has an undefined behavior */ bool is_invalid(viennacl::ocl::device const & dev) const{ return config_type::is_invalid(dev,params_); } /** @brief Returns the current profile */ typename config_type::profile_type get_current(){ return config_type::create_profile(params_); } /** @brief Reset the config */ void reset(){ for(params_t::iterator it = params_.begin() ; it != params_.end() ; ++it){ it->second.reset(); } } private: params_t params_; }; /** @brief Add the timing value for a given profile and an statement */ template double benchmark_impl(viennacl::scheduler::statement const & statement, code_generator::forced_profile_key_type key, ProfileT const & prof, unsigned int n_runs){ tools::timer t; std::list kernels; viennacl::generator::code_generator gen; gen.force_profile(key, prof); gen.add(statement, statement.array()[0]); viennacl::generator::get_configured_program(gen, kernels, true); viennacl::generator::enqueue(gen); viennacl::backend::finish(); t.start(); for(unsigned int i = 0 ; i < n_runs ; ++i) viennacl::generator::enqueue(gen); viennacl::backend::finish(); return (double)t.get()/n_runs; } /** @brief Fills a timing map for a given statement and a benchmark configuration * * @tparam OpT type of the statement * @tparam ConfigType type of the benchmark configuration * @param timings the timings to fill * @param op the given statement * @param key a key for forcing a particular kernel profile (i.e. to pick profile A for a device which would usually use profile B) * @param config the given configuration * @param n_runs Number of runs for the benchmark * @param out Pointer to output file stream for writing to file (if not NULL) */ template void benchmark(std::map * timings, scheduler::statement const & op, code_generator::forced_profile_key_type const & key, tuning_config & config, unsigned int n_runs, std::ofstream * out){ viennacl::ocl::device const & dev = viennacl::ocl::current_device(); unsigned int n_conf = 0; while(config.has_next()){ config.update(); typename ConfigType::profile_type const & profile = config.get_current(); if(config.is_invalid(dev) || profile.is_slow(dev)) continue; ++n_conf; } config.reset(); unsigned int n = 0; while(config.has_next()){ config.update(); typename ConfigType::profile_type const & profile = config.get_current(); if(config.is_invalid(dev) || profile.is_slow(dev)) continue; double percent = (double)n++*100/n_conf; double exec_time = benchmark_impl(op,key,profile,n_runs); timings->insert(std::make_pair(exec_time, profile)); std::cout << '\r' << "Autotuning..." << "[" << std::setprecision(2) << std::setfill (' ') << std::setw(6) << std::fixed << percent << "%" << "]" << " | Best : " << timings->begin()->second << " => " << std::scientific << std::right << std::setprecision(2) << timings->begin()->first << std::flush; if(out) *out << std::setprecision(3) << std::scientific << exec_time << "," << profile.csv_representation() << std::endl ; } std::cout << '\r' << "Autotuning..." << "[100.00%]" << std::endl; } } } } #endif // AUTOTUNE_HPP ViennaCL-1.5.1-src/viennacl/generator/vector_reduction.hpp000644 001750 001750 00000024610 12267307531 023645 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_GENERATE_VECTOR_REDUCTION_HPP #define VIENNACL_GENERATOR_GENERATE_VECTOR_REDUCTION_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/vector_reduction.hpp * * @brief Kernel template for the vector reduction operation */ #include #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/mapped_objects.hpp" #include "viennacl/generator/helpers.hpp" #include "viennacl/generator/utils.hpp" #include "viennacl/generator/profile_base.hpp" #include "viennacl/tools/tools.hpp" namespace viennacl{ namespace generator{ /** @brief OpenCL kernel template for reductions resulting in a vector. Example: Computing the row norms of a matrix concurrently. */ class vector_reduction : public profile_base{ vcl_size_t lmem_used(vcl_size_t scalartype_size) const { return m_*(k_+1)*scalartype_size; } public: /** @brief The user constructor */ vector_reduction(unsigned int vectorization, unsigned int m, unsigned int k, unsigned int num_groups) : profile_base(vectorization, m, k, 1), m_(m), k_(k), num_groups_(num_groups){ } static std::string csv_format() { return "Vec,M,K,NumGroups"; } std::string csv_representation() const{ std::ostringstream oss; oss << vector_size_ << "," << m_ << "," << k_ << "," << num_groups_; return oss.str(); } unsigned int m() const { return m_; } unsigned int k() const { return k_; } unsigned int num_groups() const { return num_groups_; } void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & kernel, unsigned int & n_arg) const{ configure_local_sizes(kernel, kernel_id); kernel.global_work_size(0,m_*num_groups_); kernel.global_work_size(1,k_); for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ scheduler::statement::container_type exprs = it->first.array(); for(scheduler::statement::container_type::iterator iit = exprs.begin() ; iit != exprs.end() ; ++iit){ if(iit->op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE){ scheduler::statement_node const * current_node = &(*iit); //The LHS of the prod is a matrix if(current_node->lhs.type_family==scheduler::MATRIX_TYPE_FAMILY) { kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size1_fun()))); kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size2_fun()))); return; } else{ //The LHS of the prod is a matrix expression current_node = &exprs[current_node->lhs.node_index]; if(current_node->lhs.type_family==scheduler::MATRIX_TYPE_FAMILY) { kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size1_fun()))); kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size2_fun()))); return; } else if(current_node->rhs.type_family==scheduler::MATRIX_TYPE_FAMILY) { kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size1_fun()))); kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size2_fun()))); return; } else{ assert(false && bool("unexpected expression tree")); } } return; } } } } void kernel_arguments(statements_type const & /*statements*/, std::string & arguments_string) const{ arguments_string += detail::generate_value_kernel_argument("unsigned int", "M"); arguments_string += detail::generate_value_kernel_argument("unsigned int", "N"); } private: void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector const & mapping) const { std::vector exprs; for(std::vector::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it){ for(detail::mapping_type::const_iterator iit = it->begin() ; iit != it->end() ; ++iit){ if(detail::mapped_vector_reduction * p = dynamic_cast(iit->second.get())) exprs.push_back(p); if(detail::mapped_matrix * p = dynamic_cast(iit->second.get())) p->bind_sizes("M","N"); } } vcl_size_t lsize1 = m_; vcl_size_t lsize2 = k_+1; std::string scalartype = "float"; bool is_lhs_transposed = false; if(exprs.front()->root_node().lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) if(exprs.front()->statement().array()[exprs.front()->root_node().lhs.node_index].op.type==scheduler::OPERATION_UNARY_TRANS_TYPE) is_lhs_transposed = true; std::string size1 = "M", size2 = "N"; if(is_lhs_transposed) std::swap(size1, size2); for(std::vector::iterator it = exprs.begin() ; it != exprs.end() ; ++it){ stream << "__local " << (*it)->scalartype() << " buf" << std::distance(exprs.begin(), it) << '[' << lsize1*lsize2 << "];" << std::endl; } stream << "unsigned int lid0 = get_local_id(0);" << std::endl; stream << "unsigned int lid1 = get_local_id(1);" << std::endl; stream << "for(unsigned int r = get_global_id(0) ; r < " << size1 << " ; r += get_global_size(0)){" << std::endl; stream.inc_tab(); for(vcl_size_t k = 0 ; k < exprs.size() ; ++k) stream << scalartype << " sum" << k << " = 0;" << std::endl; stream << "for(unsigned int c = get_local_id(1) ; c < " << size2 << " ; c += get_local_size(1)){" << std::endl; stream.inc_tab(); std::set fetched; for(std::vector::iterator it = exprs.begin() ; it != exprs.end() ; ++it){ viennacl::scheduler::statement const & statement = (*it)->statement(); viennacl::scheduler::statement_node const & root_node = (*it)->root_node(); if(is_lhs_transposed) detail::fetch_all_lhs(fetched,statement,root_node, std::make_pair("c", "r"),vector_size_,stream,(*it)->mapping()); else detail::fetch_all_lhs(fetched,statement,root_node, std::make_pair("r", "c"),vector_size_,stream,(*it)->mapping()); detail::fetch_all_rhs(fetched,statement,root_node, std::make_pair("c", "0"),vector_size_,stream,(*it)->mapping()); } //Update sums; for(std::vector::iterator it = exprs.begin() ; it != exprs.end() ; ++it){ viennacl::scheduler::statement const & statement = (*it)->statement(); viennacl::scheduler::statement_node const & root_node = (*it)->root_node(); std::string str; detail::generate_all_lhs(statement,root_node,std::make_pair("i","0"),-1,str,(*it)->mapping()); str += "*"; detail::generate_all_rhs(statement,root_node,std::make_pair("i","0"),-1,str,(*it)->mapping()); stream << " sum" << std::distance(exprs.begin(),it) << " += " << str << ";" << std::endl; } stream.dec_tab(); stream << "}" << std::endl; for(vcl_size_t k = 0 ; k < exprs.size() ; ++k){ stream << "buf" << k << "[lid0*" << lsize2 << "+ lid1] = sum" << k << ";" << std::endl; } for(unsigned int stride = k_/2 ; stride>1 ; stride /=2){ stream << "barrier(CLK_LOCAL_MEM_FENCE); " << std::endl; stream << "if(lid1 < " << stride << ")" ; stream << "{" << std::endl; stream.inc_tab(); for(vcl_size_t i = 0 ; i < exprs.size() ; ++i) stream << "buf" << i << "[lid0*" << lsize2 << "+ lid1] += buf" << i << "[lid0*" << lsize2 << "+ lid1 + " << stride << "];" << std::endl; stream.dec_tab(); stream << "}" << std::endl; } stream << "barrier(CLK_LOCAL_MEM_FENCE); " << std::endl; stream << "if(lid1 == 0)" ; stream << "{" << std::endl; stream.inc_tab(); for(vcl_size_t i = 0 ; i < exprs.size() ; ++i){ stream << "buf" << i << "[lid0*" << lsize2 << "] += buf" << i << "[lid0*" << lsize2 << "+ 1];" << std::endl; exprs[i]->access_name("buf"+utils::to_string(i)+"[lid0*"+utils::to_string(lsize2)+"]"); } vcl_size_t i = 0; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ std::string str; detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("r","0"), -1, str, mapping[i++]), false); stream << str << ";" << std::endl; } stream.dec_tab(); stream << "}" << std::endl; stream.dec_tab(); stream << "}" << std::endl; } private: unsigned int m_; unsigned int k_; unsigned int num_groups_; }; } } #endif ViennaCL-1.5.1-src/viennacl/generator/forwards.h000644 001750 001750 00000012613 12267307531 021556 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_FORWARDS_H #define VIENNACL_GENERATOR_FORWARDS_H /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/forwards.h @brief Forwards declaration */ #include #include #include #include #include "viennacl/tools/shared_ptr.hpp" #include "viennacl/scheduler/forwards.h" namespace viennacl{ namespace generator{ inline void generate_enqueue_statement(viennacl::scheduler::statement const & s, scheduler::statement_node const & root_node); inline void generate_enqueue_statement(viennacl::scheduler::statement const & s); enum expression_type_family{ SCALAR_SAXPY_FAMILY, VECTOR_SAXPY_FAMILY, MATRIX_SAXPY_FAMILY, SCALAR_REDUCE_FAMILY, VECTOR_REDUCE_FAMILY, MATRIX_PRODUCT_FAMILY, INVALID_EXPRESSION_FAMILY }; enum expression_type{ SCALAR_SAXPY_TYPE, VECTOR_SAXPY_TYPE, MATRIX_SAXPY_TYPE, SCALAR_REDUCE_TYPE, VECTOR_REDUCE_Nx_TYPE, VECTOR_REDUCE_Tx_TYPE, MATRIX_PRODUCT_NN_TYPE, MATRIX_PRODUCT_TN_TYPE, MATRIX_PRODUCT_NT_TYPE, MATRIX_PRODUCT_TT_TYPE, INVALID_EXPRESSION_TYPE }; inline const char * expression_type_to_string(expression_type type){ switch(type){ case SCALAR_SAXPY_TYPE : return "Scalar SAXPY"; case VECTOR_SAXPY_TYPE : return "Vector SAXPY"; case MATRIX_SAXPY_TYPE : return "Matrix SAXPY"; case SCALAR_REDUCE_TYPE : return "Inner Product"; case VECTOR_REDUCE_Nx_TYPE : return "Matrix-Vector Product : Ax"; case VECTOR_REDUCE_Tx_TYPE : return "Matrix-Vector Product : Tx"; case MATRIX_PRODUCT_NN_TYPE : return "Matrix-Matrix Product : AA"; case MATRIX_PRODUCT_TN_TYPE : return "Matrix-Matrix Product : TA"; case MATRIX_PRODUCT_NT_TYPE : return "Matrix-Matrix Product : AT"; case MATRIX_PRODUCT_TT_TYPE : return "Matrix-Matrix Product : TT"; default : return "INVALID EXPRESSION"; } } typedef std::pair expression_key_type; /** @brief A class for holding meta information such as the type or the underlying scalar type of an expression (such as x = inner_prod(y, z)). */ struct expression_descriptor{ expression_key_type make_key() const { return expression_key_type(type,scalartype_size); } bool operator==(expression_descriptor const & other) const { return type_family == other.type_family && type == other.type && scalartype_size==other.scalartype_size; } expression_type_family type_family; expression_type type; vcl_size_t scalartype_size; }; /** @brief Emulation of C++11's .at() member for std::map<> */ template ValueT const & at(std::map const & map, KeyT const & key) { typename std::map::const_iterator it = map.find(key); if (it != map.end()) return it->second; throw std::out_of_range("Generator: Key not found in map"); } namespace utils{ class kernel_generation_stream; } namespace detail{ enum node_type{ LHS_NODE_TYPE, PARENT_NODE_TYPE, RHS_NODE_TYPE }; class mapped_object; typedef std::pair key_type; typedef tools::shared_ptr container_ptr_type; typedef std::map mapping_type; template static void traverse(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node, Fun const & fun, bool recurse_binary_leaf = true); inline std::string generate(std::pair const & index, int vector_element, mapped_object const & s); static std::string & append_kernel_arguments(std::set & already_generated, std::string & str, unsigned int vector_size, mapped_object const & s); static void fetch(std::pair const & index, unsigned int vectorization, std::set & fetched, utils::kernel_generation_stream & stream, mapped_object & s); inline const char * generate(viennacl::scheduler::operation_node_type type); static void generate_all_rhs(viennacl::scheduler::statement const & statement , viennacl::scheduler::statement_node const & root_node , std::pair const & index , int vector_element , std::string & str , detail::mapping_type const & mapping); } } } #endif ViennaCL-1.5.1-src/viennacl/generator/statement_representation_functor.hpp000644 001750 001750 00000013623 12267307531 027157 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_STATEMENT_REPRESENTATION_HPP #define VIENNACL_GENERATOR_STATEMENT_REPRESENTATION_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/statement_representation_functor.hpp @brief Functor to generate the string id of a statement */ #include #include #include "viennacl/forwards.h" #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/forwards.h" #include "viennacl/tools/shared_ptr.hpp" #include "viennacl/ocl/backend.hpp" #include "viennacl/ocl/kernel.hpp" #include "viennacl/traits/start.hpp" #include "viennacl/traits/stride.hpp" #include "viennacl/generator/helpers.hpp" #include "viennacl/generator/utils.hpp" #include "viennacl/generator/mapped_objects.hpp" namespace viennacl{ namespace generator{ namespace detail{ /** @brief Helper class for the OpenCL kernel generator, representing a statement. */ class statement_representation_functor : public traversal_functor{ private: unsigned int get_id(void * handle) const{ unsigned int i = 0; for( ; i < 64 ; ++i){ void* current = memory_[i]; if(current==NULL) break; if(current==handle) return i; } memory_[i] = handle; return i; } static void append_id(char * & ptr, unsigned int val){ if(val==0) *ptr++='0'; else while(val>0) { *ptr++=static_cast('0') + static_cast(val % 10); val /= 10; } } public: typedef void result_type; statement_representation_functor(void* (&memory)[64], unsigned int , char *& ptr) : memory_(memory), ptr_(ptr){ } template result_type operator()(ScalarType const & /*scal*/) const { *ptr_++='h'; //host *ptr_++='s'; //scalar *ptr_++=utils::first_letter_of_type::value(); } /** @brief Scalar mapping */ template result_type operator()(scalar const & scal) const { *ptr_++='s'; //scalar *ptr_++=utils::first_letter_of_type::value(); append_id(ptr_, get_id((void*)&scal)); } /** @brief Vector mapping */ template result_type operator()(vector_base const & vec) const { *ptr_++='v'; //vector if(viennacl::traits::start(vec)>0) *ptr_++='r'; if(vec.stride()>1) *ptr_++='s'; *ptr_++=utils::first_letter_of_type::value(); append_id(ptr_, get_id((void*)&vec)); } /** @brief Implicit vector mapping */ template result_type operator()(implicit_vector_base const & vec) const { *ptr_++='i'; //implicit *ptr_++='v'; //vector if(vec.is_value_static()) *ptr_++='v'; //value if(vec.has_index()) *ptr_++='i'; *ptr_++=utils::first_letter_of_type::value(); } /** @brief Matrix mapping */ template result_type operator()(matrix_base const & mat) const { *ptr_++='m'; //vector if(viennacl::traits::start1(mat)>0) *ptr_++='r'; if(viennacl::traits::stride1(mat)>1) *ptr_++='s'; if(viennacl::traits::start2(mat)>0) *ptr_++='r'; if(viennacl::traits::stride2(mat)>1) *ptr_++='s'; *ptr_++=utils::first_letter_of_type::value(); *ptr_++=utils::first_letter_of_type::value(); append_id(ptr_, get_id((void*)&mat)); } /** @brief Implicit matrix mapping */ template result_type operator()(implicit_matrix_base const & mat) const { *ptr_++='i'; //implicit *ptr_++='m'; //matrix if(mat.is_value_static()) *ptr_++='v'; //value *ptr_++=utils::first_letter_of_type::value(); } void operator()(scheduler::statement const *, scheduler::statement_node const * root_node, detail::node_type node_type) const { if(node_type==LHS_NODE_TYPE && root_node->lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) utils::call_on_element(root_node->lhs, *this); else if(node_type==RHS_NODE_TYPE && root_node->rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) utils::call_on_element(root_node->rhs, *this); else if(node_type==PARENT_NODE_TYPE){ const char * op_expr = detail::generate(root_node->op.type); vcl_size_t n = std::strlen(op_expr); std::memcpy(ptr_, op_expr, n); ptr_+=n; } } private: void* (&memory_)[64]; char *& ptr_; }; } } } #endif ViennaCL-1.5.1-src/viennacl/generator/helpers.hpp000644 001750 001750 00000035154 12267307531 021736 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_GENERATE_UTILS_HPP #define VIENNACL_GENERATOR_GENERATE_UTILS_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/helpers.hpp @brief several code generation helpers */ #include #ifdef __APPLE__ #include #else #include "CL/cl.h" #endif #include "viennacl/forwards.h" #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/utils.hpp" #include "viennacl/generator/forwards.h" namespace viennacl{ namespace generator{ namespace detail{ /** @brief generate the string for a pointer kernel argument */ static std::string generate_value_kernel_argument(std::string const & scalartype, std::string const & name){ return scalartype + ' ' + name + ","; } /** @brief generate the string for a pointer kernel argument */ static std::string generate_pointer_kernel_argument(std::string const & address_space, std::string const & scalartype, std::string const & name){ return address_space + " " + scalartype + "* " + name + ","; } /** @brief generate a string from an operation_node_type */ inline const char * generate(viennacl::scheduler::operation_node_type type){ // unary expression switch(type){ case viennacl::scheduler::OPERATION_UNARY_ABS_TYPE : return "abs"; case viennacl::scheduler::OPERATION_UNARY_TRANS_TYPE : return "trans"; case viennacl::scheduler::OPERATION_BINARY_ASSIGN_TYPE : return "="; case viennacl::scheduler::OPERATION_BINARY_INPLACE_ADD_TYPE : return "+="; case viennacl::scheduler::OPERATION_BINARY_INPLACE_SUB_TYPE : return "-="; case viennacl::scheduler::OPERATION_BINARY_ADD_TYPE : return "+"; case viennacl::scheduler::OPERATION_BINARY_SUB_TYPE : return "-"; case viennacl::scheduler::OPERATION_BINARY_MULT_TYPE : return "*"; case viennacl::scheduler::OPERATION_BINARY_DIV_TYPE : return "/"; case viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE : return "iprod"; case viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE : return "mmprod"; case viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE : return "mvprod"; case viennacl::scheduler::OPERATION_BINARY_ACCESS_TYPE : return "[]"; default : throw "not implemented"; } } /** @brief checks whether an operator is both a binary node and a leaf */ inline bool is_binary_leaf_operator(viennacl::scheduler::operation_node_type const & op_type) { return op_type == viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE ||op_type == viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE ||op_type == viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE; } /** @brief checks whether an operator is arithmetic or not */ inline bool is_arithmetic_operator(viennacl::scheduler::operation_node_type const & op_type) { return op_type == viennacl::scheduler::OPERATION_BINARY_ASSIGN_TYPE ||op_type == viennacl::scheduler::OPERATION_BINARY_ADD_TYPE ||op_type == viennacl::scheduler::OPERATION_BINARY_DIV_TYPE ||op_type == viennacl::scheduler::OPERATION_BINARY_ELEMENT_DIV_TYPE ||op_type == viennacl::scheduler::OPERATION_BINARY_ELEMENT_PROD_TYPE ||op_type == viennacl::scheduler::OPERATION_BINARY_INPLACE_ADD_TYPE ||op_type == viennacl::scheduler::OPERATION_BINARY_INPLACE_SUB_TYPE // ||op_type == viennacl::scheduler::OPERATION_BINARY_INPLACE_DIV_TYPE // ||op_type == viennacl::scheduler::OPERATION_BINARY_INPLACE_MULT_TYPE ||op_type == viennacl::scheduler::OPERATION_BINARY_MULT_TYPE ||op_type == viennacl::scheduler::OPERATION_BINARY_SUB_TYPE; } /** @brief Recursively execute a functor on a statement */ template static void traverse(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node, Fun const & fun, bool recurse_binary_leaf /* see forwards.h for default argument */){ if(root_node.op.type_family==viennacl::scheduler::OPERATION_UNARY_TYPE_FAMILY) { //Self: fun(&statement, &root_node, PARENT_NODE_TYPE); //Lhs: fun.call_before_expansion(); if(root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) traverse(statement, statement.array()[root_node.lhs.node_index], fun, recurse_binary_leaf); fun(&statement, &root_node, LHS_NODE_TYPE); fun.call_after_expansion(); } else if(root_node.op.type_family==viennacl::scheduler::OPERATION_BINARY_TYPE_FAMILY) { bool deep_recursion = recurse_binary_leaf || !is_binary_leaf_operator(root_node.op.type); fun.call_before_expansion(); //Lhs: if(deep_recursion){ if(root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) traverse(statement, statement.array()[root_node.lhs.node_index], fun, recurse_binary_leaf); fun(&statement, &root_node, LHS_NODE_TYPE); } //Self: fun(&statement, &root_node, PARENT_NODE_TYPE); //Rhs: if(deep_recursion){ if(root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) traverse(statement, statement.array()[root_node.rhs.node_index], fun, recurse_binary_leaf); fun(&statement, &root_node, RHS_NODE_TYPE); } fun.call_after_expansion(); } } /** @brief base functor class for traversing a statement */ class traversal_functor{ public: void call_before_expansion() const { } void call_after_expansion() const { } }; /** @brief functor for generating the prototype of a statement */ class prototype_generation_traversal : public traversal_functor{ private: std::set & already_generated_; std::string & str_; unsigned int vector_size_; mapping_type const & mapping_; public: prototype_generation_traversal(std::set & already_generated, std::string & str, unsigned int vector_size, mapping_type const & mapping) : already_generated_(already_generated), str_(str), vector_size_(vector_size), mapping_(mapping){ } void operator()(viennacl::scheduler::statement const *, viennacl::scheduler::statement_node const * root_node, detail::node_type node_type) const { if( (node_type==detail::LHS_NODE_TYPE && root_node->lhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) ||(node_type==detail::RHS_NODE_TYPE && root_node->rhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) ) append_kernel_arguments(already_generated_, str_, vector_size_, *at(mapping_, std::make_pair(root_node,node_type))); } }; /** @brief functor for fetching the elements of a statement */ class fetch_traversal : public traversal_functor{ private: std::set & fetched_; std::pair index_string_; unsigned int vectorization_; utils::kernel_generation_stream & stream_; mapping_type const & mapping_; public: fetch_traversal(std::set & fetched, std::pair const & index, unsigned int vectorization, utils::kernel_generation_stream & stream, mapping_type const & mapping) : fetched_(fetched), index_string_(index), vectorization_(vectorization), stream_(stream), mapping_(mapping){ } void operator()(viennacl::scheduler::statement const *, viennacl::scheduler::statement_node const * root_node, detail::node_type node_type) const { if( (node_type==detail::LHS_NODE_TYPE && root_node->lhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) ||(node_type==detail::RHS_NODE_TYPE && root_node->rhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) ) fetch(index_string_, vectorization_, fetched_, stream_, *at(mapping_, std::make_pair(root_node, node_type))); } }; /** @brief functor for fetching the LHS of a statement's node * * Forwards to fetch_traversal functor if the LHS is not a leaf */ static void fetch_all_lhs(std::set & fetched , viennacl::scheduler::statement const & statement , viennacl::scheduler::statement_node const & root_node , std::pair const & index , vcl_size_t const & vectorization , utils::kernel_generation_stream & stream , detail::mapping_type const & mapping){ if(root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) detail::traverse(statement, statement.array()[root_node.lhs.node_index], detail::fetch_traversal(fetched, index, static_cast(vectorization), stream, mapping)); else detail::fetch(index, static_cast(vectorization),fetched, stream, *at(mapping, std::make_pair(&root_node,detail::LHS_NODE_TYPE))); } /** @brief functor for fetching the RHS of a statement's node * * Forwards to fetch_traversal functor if the RHS is not a leaf */ static void fetch_all_rhs(std::set & fetched , viennacl::scheduler::statement const & statement , viennacl::scheduler::statement_node const & root_node , std::pair const & index , vcl_size_t const & vectorization , utils::kernel_generation_stream & stream , detail::mapping_type const & mapping){ if(root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) detail::traverse(statement, statement.array()[root_node.rhs.node_index], detail::fetch_traversal(fetched, index, static_cast(vectorization), stream, mapping)); else detail::fetch(index, static_cast(vectorization),fetched, stream, *at(mapping, std::make_pair(&root_node,detail::RHS_NODE_TYPE))); } /** @brief functor for generating the expression string from a statement */ class expression_generation_traversal : public traversal_functor{ private: std::pair index_string_; int vector_element_; std::string & str_; mapping_type const & mapping_; public: expression_generation_traversal(std::pair const & index, int vector_element, std::string & str, mapping_type const & mapping) : index_string_(index), vector_element_(vector_element), str_(str), mapping_(mapping){ } void call_before_expansion() const { str_+="("; } void call_after_expansion() const { str_+=")"; } void operator()(viennacl::scheduler::statement const *, viennacl::scheduler::statement_node const * root_node, detail::node_type node_type) const { if(node_type==PARENT_NODE_TYPE) { if(is_binary_leaf_operator(root_node->op.type)) str_ += generate(index_string_, vector_element_, *at(mapping_, std::make_pair(root_node, node_type))); else if(is_arithmetic_operator(root_node->op.type)) str_ += generate(root_node->op.type); } else{ if(node_type==LHS_NODE_TYPE){ if(root_node->lhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) str_ += detail::generate(index_string_,vector_element_, *at(mapping_, std::make_pair(root_node,node_type))); } else if(node_type==RHS_NODE_TYPE){ if(root_node->rhs.type_family!=viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) str_ += detail::generate(index_string_,vector_element_, *at(mapping_, std::make_pair(root_node,node_type))); } } } }; static void generate_all_lhs(viennacl::scheduler::statement const & statement , viennacl::scheduler::statement_node const & root_node , std::pair const & index , int vector_element , std::string & str , detail::mapping_type const & mapping){ if(root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) detail::traverse(statement, statement.array()[root_node.lhs.node_index], detail::expression_generation_traversal(index, vector_element, str, mapping)); else str += detail::generate(index, vector_element,*at(mapping, std::make_pair(&root_node,detail::LHS_NODE_TYPE))); } static void generate_all_rhs(viennacl::scheduler::statement const & statement , viennacl::scheduler::statement_node const & root_node , std::pair const & index , int vector_element , std::string & str , detail::mapping_type const & mapping){ if(root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) detail::traverse(statement, statement.array()[root_node.rhs.node_index], detail::expression_generation_traversal(index, vector_element, str, mapping)); else str += detail::generate(index, vector_element,*at(mapping, std::make_pair(&root_node,detail::RHS_NODE_TYPE))); } } } } #endif ViennaCL-1.5.1-src/viennacl/generator/profile_base.hpp000644 001750 001750 00000020041 12267307531 022713 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_GENERATE_TEMPLATE_BASE_BASE #define VIENNACL_GENERATOR_GENERATE_TEMPLATE_BASE_BASE /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/profile_base.hpp * * @brief Base classes for the profiles */ #include #include #include "viennacl/ocl/backend.hpp" #include "viennacl/ocl/kernel.hpp" #include "viennacl/ocl/device.hpp" #include "viennacl/ocl/device_utils.hpp" #include "viennacl/ocl/infos.hpp" #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/helpers.hpp" #include "viennacl/generator/map_functor.hpp" namespace viennacl{ namespace generator{ /** @brief Base class for an operation profile */ class profile_base{ public: typedef std::list< std::pair > statements_type; protected: friend std::ostream & operator<<(std::ostream &, profile_base const &); virtual bool invalid_impl(viennacl::ocl::device const & /*dev*/, vcl_size_t /*scalartype_size*/) const { return false; } virtual bool is_slow_impl(viennacl::ocl::device const &) const { return false; } virtual vcl_size_t lmem_used(vcl_size_t /*scalartype_size*/) const { return 0; } void configure_local_sizes(viennacl::ocl::kernel & k, vcl_size_t /*kernel_id*/) const { k.local_work_size(0,local_size_1_); k.local_work_size(1,local_size_2_); } virtual void print(std::ostream & s) const{ s << csv_representation(); } /** @brief Generates the body of the associated kernel function * * @param kernel_id If this profile requires multiple kernel, the index for which the core should be generated * @param stream The output stream the kernel is written to * @param statements the statements for which the code should be generated * @param mapping the mapping of the statement_nodes to the mapped_objects */ virtual void core(vcl_size_t kernel_id, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector const & mapping) const = 0; public: /** @brief The constructor */ profile_base(unsigned int vectorization, vcl_size_t local_size_1, vcl_size_t local_size_2, vcl_size_t num_kernels) : vector_size_(vectorization), local_size_1_(local_size_1), local_size_2_(local_size_2), num_kernels_(num_kernels){ } /** @brief The destructor */ virtual ~profile_base(){ } /** @brief Configures the range and enqueues the arguments associated with the profile */ virtual void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const = 0; virtual void kernel_arguments(statements_type const & statements, std::string & arguments_string) const = 0; /** @brief Get the vector size of the kernel */ unsigned int vector_size() const { return vector_size_; } /** @brief csv representation of an operation * * Useful when writing to a file */ virtual std::string csv_representation() const = 0; /** @brief returns whether or not the profile is likely to be slow on a particular device * @param dev the given device*/ bool is_slow(viennacl::ocl::device const & dev) const{ bool res = false; if(dev.type()==CL_DEVICE_TYPE_GPU){ vcl_size_t warp_size = 32; if(dev.vendor_id()==4098) warp_size = 64; res = static_cast(((local_size_1_*local_size_2_)%warp_size)>0); } return res || is_slow_impl(dev); } /** @brief returns whether or not the profile leads to undefined behavior on particular device * @param dev the given device * @param scalartype_size Local memory required to execute the kernel */ bool is_invalid(viennacl::ocl::device const & dev, vcl_size_t scalartype_size) const{ //Query device informations vcl_size_t lmem_available = static_cast(dev.local_mem_size()); vcl_size_t max_workgroup_size = dev.max_work_group_size(); std::vector max_work_item_sizes = dev.max_work_item_sizes(); bool invalid_work_group_sizes = local_size_1_*local_size_2_ > max_workgroup_size || local_size_1_ > max_work_item_sizes[0] || local_size_2_ > max_work_item_sizes[1]; // uses too much resources return invalid_work_group_sizes || lmem_used(scalartype_size)>lmem_available || invalid_impl(dev, scalartype_size); } /** @brief Returns the number of kernels needed by this operation */ vcl_size_t num_kernels() const{ return num_kernels_; } /** @brief Generates the code associated with this profile onto the provided stream * Redirects to the virtual core() method * * @param stream Stream onto which the code should be generated * @param device_offset the index of the device in the context (used for the kernel name) * @param statements the statements associated with this profile */ virtual void operator()(utils::kernel_generation_stream & stream, vcl_size_t device_offset, statements_type const & statements) const { std::vector mapping(statements.size()); ///Get Prototype, initialize mapping std::string prototype; std::set already_generated; kernel_arguments(statements, prototype); { std::map memory; unsigned int current_arg = 0; vcl_size_t i = 0; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it) detail::traverse(it->first, it->second, detail::map_functor(memory,current_arg,mapping[i++])); } for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ detail::traverse(it->first, it->second, detail::prototype_generation_traversal(already_generated, prototype, vector_size(), mapping[std::distance(statements.begin(), it)])); } prototype.erase(prototype.size()-1); //Last comma pruned //Generate for(vcl_size_t n = 0 ; n < num_kernels() ; ++n){ //stream << "__attribute__((vec_type_hint()))" << std::endl; stream << " __attribute__((reqd_work_group_size(" << local_size_1_ << "," << local_size_2_ << "," << 1 << ")))" << std::endl; stream << "__kernel " << "void " << "kernel_" << device_offset << "_" << n << "(" << std::endl; stream << prototype << std::endl; stream << ")" << std::endl; //core: stream << "{" << std::endl; stream.inc_tab(); core(n, stream, statements, mapping); stream.dec_tab(); stream << "}" << std::endl; } } protected: unsigned int vector_size_; vcl_size_t local_size_1_; vcl_size_t local_size_2_; vcl_size_t num_kernels_; }; inline std::ostream & operator<<(std::ostream & os, profile_base const & profile){ profile.print(os); return os; } } } #endif ViennaCL-1.5.1-src/viennacl/generator/mapped_objects.hpp000644 001750 001750 00000035024 12267307531 023247 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_MAPPED_TYPE_HPP #define VIENNACL_GENERATOR_MAPPED_TYPE_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/mapped_objects.hpp @brief Map ViennaCL objects to generator wrappers */ #include #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/forwards.h" #include "viennacl/generator/utils.hpp" namespace viennacl{ namespace generator{ namespace detail{ /** @brief Base class for mapping viennacl datastructure to generator-friendly structures */ class mapped_object{ protected: /** \cond */ struct node_info{ node_info() : mapping(NULL), statement(NULL), root_node(NULL) { } mapping_type const * mapping; scheduler::statement const * statement; scheduler::statement_node const * root_node; }; /** \endcond */ virtual std::string generate_default(std::pair const & index) const = 0; virtual std::string append_vector_size(std::string const & scalartype, unsigned int) const { return scalartype; } public: mapped_object(std::string const & scalartype) : scalartype_(scalartype){ } virtual std::string & append_kernel_arguments(std::set &, std::string & str, unsigned int) const{ return str; } std::string const & scalartype() const { return scalartype_; } void access_name(std::string const & str) { access_name_ = str; } std::string const & access_name() const { return access_name_; } virtual std::string generate(std::pair const & index, int) const{ if(!access_name_.empty()) return access_name_; else return generate_default(index); } virtual ~mapped_object(){ } protected: std::string access_name_; std::string scalartype_; }; /** @brief Base class for mapping binary leaves (inner product-based, matrix vector product-base, matrix-matrix product based...) */ class mapped_binary_leaf : public mapped_object{ public: mapped_binary_leaf(std::string const & scalartype) : mapped_object(scalartype){ } mapping_type const & mapping() const { return *info_.mapping; } scheduler::statement const & statement() const { return *info_.statement; } scheduler::statement_node const & root_node() const { return *info_.root_node; } std::string generate_default(std::pair const &) const { return "";} protected: node_info info_; }; /** @brief Mapping of a matrix product */ class mapped_matrix_product : public mapped_binary_leaf{ friend class map_functor; public: mapped_matrix_product(std::string const & scalartype) : mapped_binary_leaf(scalartype){ } }; /** @brief Base class for mapping a reduction */ class mapped_reduction : public mapped_binary_leaf{ public: mapped_reduction(std::string const & scalartype) : mapped_binary_leaf(scalartype){ } viennacl::scheduler::operation_node_type reduction_type() const { return reduction_type_; } private: viennacl::scheduler::operation_node_type reduction_type_; }; /** @brief Mapping of a scalar reduction (based on inner product) */ class mapped_scalar_reduction : public mapped_reduction{ friend class map_functor; public: mapped_scalar_reduction(std::string const & scalartype) : mapped_reduction(scalartype){ } }; /** @brief Mapping of a vector reduction (based on matrix-vector product) */ class mapped_vector_reduction : public mapped_reduction{ friend class map_functor; public: mapped_vector_reduction(std::string const & scalartype) : mapped_reduction(scalartype){ } }; /** @brief Mapping of a host scalar to a generator class */ class mapped_host_scalar : public mapped_object{ friend class map_functor; std::string generate_default(std::pair const &) const{ return name_; } public: mapped_host_scalar(std::string const & scalartype) : mapped_object(scalartype){ } std::string const & name() { return name_; } std::string & append_kernel_arguments(std::set & already_generated, std::string & str, unsigned int) const{ if(already_generated.insert(name_).second) str += detail::generate_value_kernel_argument(scalartype_, name_); return str; } private: std::string name_; }; /** @brief Base class for datastructures passed by pointer */ class mapped_handle : public mapped_object{ virtual std::string offset(std::pair const & index) const = 0; virtual void append_optional_arguments(std::string &) const{ } std::string generate_default(std::pair const & index) const{ return name_ + '[' + offset(index) + ']'; } public: mapped_handle(std::string const & scalartype) : mapped_object(scalartype){ } std::string const & name() const { return name_; } void fetch(std::pair const & index, unsigned int vectorization, std::set & fetched, utils::kernel_generation_stream & stream) { std::string new_access_name = name_ + "_private"; if(fetched.find(name_)==fetched.end()){ stream << scalartype_; if(vectorization > 1) stream << vectorization; stream << " " << new_access_name << " = " << generate_default(index) << ';' << std::endl; fetched.insert(name_); } access_name_ = new_access_name; } void write_back(std::pair const & index, std::set & fetched, utils::kernel_generation_stream & stream) { std::string old_access_name = access_name_ ; access_name_ = ""; if(fetched.find(name_)!=fetched.end()){ stream << generate_default(index) << " = " << old_access_name << ';' << std::endl; fetched.erase(name_); } } std::string & append_kernel_arguments(std::set & already_generated, std::string & str, unsigned int vector_size) const{ if(already_generated.insert(name_).second){ std::string vector_scalartype = append_vector_size(scalartype_, vector_size); str += detail::generate_pointer_kernel_argument("__global", vector_scalartype, name_); append_optional_arguments(str); } return str; } protected: std::string name_; }; /** @brief Mapping of a scalar to a generator class */ class mapped_scalar : public mapped_handle{ friend class map_functor; private: std::string offset(std::pair const &) const { return "0"; } public: mapped_scalar(std::string const & scalartype) : mapped_handle(scalartype){ } }; /** @brief Base class for mapping buffer-based objects to a generator class */ class mapped_buffer : public mapped_handle{ protected: std::string append_vector_size(std::string const & scalartype, unsigned int vector_size) const { if(vector_size>1) return scalartype + utils::to_string(vector_size); else return scalartype; } public: mapped_buffer(std::string const & scalartype) : mapped_handle(scalartype){ } virtual std::string generate(std::pair const & index, int vector_element) const{ if(vector_element>-1) return mapped_object::generate(index, vector_element)+".s"+utils::to_string(vector_element); return mapped_object::generate(index, vector_element); } }; /** @brief Mapping of a vector to a generator class */ class mapped_vector : public mapped_buffer{ friend class map_functor; std::string offset(std::pair const & index) const { if(info_.statement){ std::string str; detail::generate_all_rhs(*info_.statement, *info_.root_node, index, -1, str, *info_.mapping); return str; } else return index.first; } void append_optional_arguments(std::string & str) const{ if(!start_name_.empty()) str += detail::generate_value_kernel_argument("unsigned int", start_name_); if(!stride_name_.empty()) str += detail::generate_value_kernel_argument("unsigned int", stride_name_); if(!shift_name_.empty()) str += detail::generate_value_kernel_argument("unsigned int", shift_name_); } public: mapped_vector(std::string const & scalartype) : mapped_buffer(scalartype){ } private: node_info info_; std::string start_name_; std::string stride_name_; std::string shift_name_; }; /** @brief Mapping of a matrix to a generator class */ class mapped_matrix : public mapped_buffer{ friend class map_functor; void append_optional_arguments(std::string & str) const{ if(!start1_name_.empty()) str += detail::generate_value_kernel_argument("unsigned int", start1_name_); if(!stride1_name_.empty()) str += detail::generate_value_kernel_argument("unsigned int", stride1_name_); if(!start2_name_.empty()) str += detail::generate_value_kernel_argument("unsigned int", start2_name_); if(!stride2_name_.empty()) str += detail::generate_value_kernel_argument("unsigned int", stride2_name_); } public: mapped_matrix(std::string const & scalartype) : mapped_buffer(scalartype){ } bool is_row_major() const { return is_row_major_; } std::string const & size1() const { return size1_; } std::string const & size2() const { return size2_; } void bind_sizes(std::string const & size1, std::string const & size2) const{ size1_ = size1; size2_ = size2; } std::string offset(std::pair const & index) const { std::string i = index.first; std::string j = index.second; if(is_row_major_) if(j=="0") return '(' + i + ')' + '*' + size2_; else return '(' + i + ')' + '*' + size2_ + "+ (" + j + ')'; else if(i=="0") return "(" + j + ')' + '*' + size1_; else return '(' + i + ')' + "+ (" + j + ')' + '*' + size1_; } private: mutable std::string size1_; mutable std::string size2_; std::string start1_name_; std::string stride1_name_; std::string shift1_name_; std::string start2_name_; std::string stride2_name_; std::string shift2_name_; bool is_row_major_; }; /** @brief Mapping of a implicit vector to a generator class */ class mapped_implicit_vector : public mapped_object{ friend class map_functor; std::string value_name_; std::string index_name_; public: mapped_implicit_vector(std::string const & scalartype) : mapped_object(scalartype){ } std::string generate_default(std::pair const & /*index*/) const{ return value_name_; } std::string & append_kernel_arguments(std::set & /*already_generated*/, std::string & str, unsigned int /*vector_size*/) const{ if(!value_name_.empty()) str += detail::generate_value_kernel_argument(scalartype_, value_name_); if(!index_name_.empty()) str += detail::generate_value_kernel_argument("unsigned int", index_name_); return str; } }; /** @brief Mapping of a implicit matrix to a generator class */ class mapped_implicit_matrix : public mapped_object{ friend class map_functor; std::string value_name_; public: mapped_implicit_matrix(std::string const & scalartype) : mapped_object(scalartype){ } std::string generate_default(std::pair const & /* index */) const{ return value_name_; } std::string & append_kernel_arguments(std::set & /*already generated*/, std::string & str, unsigned int /*vector size*/) const{ if(!value_name_.empty()) str += detail::generate_value_kernel_argument(scalartype_, value_name_); return str; } }; inline std::string generate(std::pair const & index, int vector_element, mapped_object const & s){ return s.generate(index, vector_element); } static void fetch(std::pair const & index, unsigned int vectorization, std::set & fetched, utils::kernel_generation_stream & stream, mapped_object & s){ if(mapped_handle * p = dynamic_cast(&s)) p->fetch(index, vectorization, fetched, stream); } static std::string & append_kernel_arguments(std::set & already_generated, std::string & str, unsigned int vector_size, mapped_object const & s){ return s.append_kernel_arguments(already_generated, str, vector_size); } } } } #endif ViennaCL-1.5.1-src/viennacl/generator/profiles.hpp000644 001750 001750 00000076355 12267307531 022127 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_PROFILES_HPP #define VIENNACL_GENERATOR_PROFILES_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/profiles.hpp * * @brief Vendor-specific parameters for the generated kernels */ #include #include "viennacl/ocl/device.hpp" #include "viennacl/generator/forwards.h" #include "viennacl/tools/shared_ptr.hpp" #include "viennacl/generator/profile_base.hpp" #include "viennacl/generator/saxpy.hpp" #include "viennacl/generator/scalar_reduction.hpp" #include "viennacl/generator/vector_reduction.hpp" #include "viennacl/generator/matrix_product.hpp" namespace viennacl{ namespace generator{ namespace profiles{ typedef cl_uint vendor_id_type; typedef cl_device_type device_type; typedef std::string device_name_type; typedef viennacl::tools::shared_ptr profile_base_ptr; /** @brief Helper struct for mapping a std::map. Used to avoids type length explosion when using nested std::map directly */ template struct map_wrapper{ typedef std::map map_type; map_type map; ValueType & operator[](KeyType const & key){ return map[key]; } }; /** @brief Represents expression->profile in the map hierarchy vendor->device_type->device_arch->device->expression->profile. */ struct expression_map : public map_wrapper{ }; /** @brief Represents device->expression in the map hierarchy vendor->device_type->device_arch->device->expression->profile. */ struct device_name_map : public map_wrapper{ }; /** @brief Represents device_arch->device in the map hierarchy vendor->device_type->device_arch->device->expression->profile. */ struct device_architecture_map : public map_wrapper{ }; /** @brief Represents device_type->device_arch in the map hierarchy vendor->device_type->device_arch->device->expression->profile. */ struct device_type_map : public map_wrapper{ }; /** @brief Represents vendor->device_type in the map hierarchy vendor->device_type->device_arch->device->expression->profile. */ struct database_type : public map_wrapper{ }; /** @brief Set a default of a generation to a particular device for a particular operation */ inline void set_generation_default_to(database_type & map, vendor_id_type vendor_id, viennacl::ocl::device_architecture_family family, expression_key_type expression, std::string const & device_name){ map[vendor_id][CL_DEVICE_TYPE_GPU][family][""][expression] = map[vendor_id][CL_DEVICE_TYPE_GPU][family][device_name][expression]; } /** @brief Set a default of a generation to a particular device for all operations */ inline void set_all_generation_default_to(database_type & map, vendor_id_type vendor_id, viennacl::ocl::device_architecture_family family, std::string const & device_name){ set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_SAXPY_TYPE,4),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_SAXPY_TYPE,4),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(SCALAR_REDUCE_TYPE,4),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Nx_TYPE,4),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Tx_TYPE,4),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NN_TYPE,4),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TN_TYPE,4),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NT_TYPE,4),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TT_TYPE,4),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_SAXPY_TYPE,8),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_SAXPY_TYPE,8),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(SCALAR_REDUCE_TYPE,8),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Nx_TYPE,8),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Tx_TYPE,8),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NN_TYPE,8),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TN_TYPE,8),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NT_TYPE,8),device_name); set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TT_TYPE,8),device_name); } /** @brief Initialize the database */ static database_type init_database(){ database_type map; /*---------------------------*/ /* GPU Defaults */ /*---------------------------*/ map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,128,128,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(1, 128, 128, true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(1,128,128,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(1, 128, 128, true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); /*---------------------------*/ /* CPU Defaults */ /*---------------------------*/ map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(8,16,256,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,8,512,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,2,1,8)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,16,8,8)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(8,16,32,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(8,8,512,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,1,8)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,8,16,16)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); /*---------------------------*/ /* ACCELERATOR Defaults */ /*---------------------------*/ //same as CPU for now map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(8,16,256,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,8,512,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,2,1,8)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,16,8,8)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(8,16,32,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(8,8,512,true)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,1,8)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,8,16,16)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0)); /*---------------------------*/ /* AMD */ /*---------------------------*/ //Evergreen //Cypress map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,4,64,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,128,128,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,32,8,256)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(4,4,64,16,4,4,8,1,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(4,4,64,16,4,4,8,1,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,64,16,4,4,8,1,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,128,8,8,4,4,0,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,256,64,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,256,1024)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,64,4,256)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(2,16,32,16,2,2,8,0,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(2,4,64,32,4,2,2,0,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(4,2,64,32,8,8,4,0,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(2,16,64,8,2,2,4,0,0)); //Default set_all_generation_default_to(map,viennacl::ocl::amd_id,viennacl::ocl::Evergreen,"Cypress"); //Southern Islands map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,4,64,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,128,128,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,32,8,256)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,128,32,4,4,4,1,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,64,16,4,2,8,1,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(4,16,64,16,4,4,8,1,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(4,16,64,16,4,4,8,1,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,256,64,true)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,256,1024)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,64,4,256)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(2,4,128,64,4,2,2,1,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(2,2,128,32,4,2,2,0,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(2,8,128,32,2,2,2,1,0)); map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(2,8,128,32,2,2,2,1,0)); //Default set_all_generation_default_to(map,viennacl::ocl::amd_id,viennacl::ocl::SouthernIslands,"Tahiti"); /*---------------------------*/ /* NVidia */ /*---------------------------*/ //-----Fermi //Geforce GTX 470 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,1,256,true)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(4,64,512,true)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,64,4,64)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,2,64,64,8,4,2,1,0)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,16,4,4,8,0,0)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(2,16,16,16,16,true)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,64,512,true)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,128,1024)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,16,32,1024)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,64,32,2,2,8,1,0)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,64,128,4,2,2,8,0,1)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0)); map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0)); //default set_all_generation_default_to(map,viennacl::ocl::nvidia_id,viennacl::ocl::Fermi,"GeForce GTX 470"); return map; } static database_type database = init_database(); /** @brief If the fallback is too harsh, use a very conservative profile */ static profile_base * handle_failure(viennacl::ocl::device const & device, expression_descriptor const & descriptor, tools::shared_ptr const & profile){ //Returns default if the profile is invalid if(profile->is_invalid(device, descriptor.scalartype_size)) return at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, device.type()).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, descriptor.make_key()).get(); return profile.get(); } /** @brief Get the profile for a device and a descriptor */ static profile_base * get(viennacl::ocl::device const & device, expression_descriptor const & descriptor){ device_type dev_type = device.type(); vendor_id_type vendor_id = device.vendor_id(); viennacl::ocl::device_architecture_family device_architecture = device.architecture_family(); std::string const & device_name = device.name(); expression_key_type expression_key = descriptor.make_key(); //std::cout << "Looking up vendor ID..." << std::endl; /*-Vendor ID-*/ database_type::map_type::iterator vendor_it = database.map.find(vendor_id); //Vendor not recognized => global default: if(vendor_it==database.map.end()) return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key)); /*-Device Type-*/ //std::cout << "Looking up device type..." << std::endl; device_type_map::map_type::iterator device_type_it = vendor_it->second.map.find(dev_type); //Device type not recognized for this vendor => global default if(device_type_it==vendor_it->second.map.end()) return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key)); /*-Device Architecture-*/ //std::cout << "Looking up device architecture..." << std::endl; device_architecture_map::map_type::iterator architecture_it = device_type_it->second.map.find(device_architecture); if(architecture_it==device_type_it->second.map.end()) return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key)); /*-Device Name-*/ //std::cout << "Looking up device name..." << std::endl; device_name_map::map_type::iterator device_name_it = architecture_it->second.map.find(device_name); //Name not found => Vendor default if(device_name_it==architecture_it->second.map.end()) return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key)); //std::cout << "Looking up expression name.." << std::endl; /*-Expression-*/ expression_map::map_type::iterator expression_it = device_name_it->second.map.find(expression_key); //Expression not found => Vendor default if(expression_it==device_name_it->second.map.end()) return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key)); //std::cout << "Device found in the database! Getting profile..." << std::endl; //Everything okay. Return specific profile// return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key)); } } } } #endif ViennaCL-1.5.1-src/viennacl/generator/map_functor.hpp000644 001750 001750 00000016724 12267307531 022613 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_MAP_GENERATE_PROTOTYPE_HPP #define VIENNACL_GENERATOR_MAP_GENERATE_PROTOTYPE_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/map_functor.hpp @brief Functor to map the statements to the types defined in mapped_objects.hpp */ #include #include "viennacl/forwards.h" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/forwards.h" #include "viennacl/tools/shared_ptr.hpp" #include "viennacl/generator/helpers.hpp" #include "viennacl/generator/utils.hpp" #include "viennacl/generator/mapped_objects.hpp" namespace viennacl{ namespace generator{ namespace detail{ /** @brief Functor to map the statements to the types defined in mapped_objects.hpp */ class map_functor : public traversal_functor{ std::string create_name(unsigned int & current_arg, std::map & memory, void * handle) const{ if(handle==NULL) return "arg" + utils::to_string(current_arg_++); if(memory.insert(std::make_pair(handle, current_arg)).second) return "arg" + utils::to_string(current_arg_++); else return "arg" + utils::to_string(memory[handle]); } public: typedef container_ptr_type result_type; map_functor(std::map & memory, unsigned int & current_arg, mapping_type & mapping) : memory_(memory), current_arg_(current_arg), mapping_(mapping){ } /** @brief Binary leaf */ template result_type binary_leaf(viennacl::scheduler::statement const * statement, viennacl::scheduler::statement_node const * root_node, mapping_type const * mapping) const { T * p = new T("float"); p->info_.statement = statement; p->info_.root_node = root_node; p->info_.mapping = mapping; return container_ptr_type(p); } template result_type operator()(ScalarType const & /*scal*/) const { mapped_host_scalar * p = new mapped_host_scalar(utils::type_to_string::value()); p->name_ = create_name(current_arg_, memory_, NULL); return container_ptr_type(p); } /** @brief Scalar mapping */ template result_type operator()(scalar const & scal) const { mapped_scalar * p = new mapped_scalar(utils::type_to_string::value()); p->name_ = create_name(current_arg_, memory_, (void*)&scal); return container_ptr_type(p); } /** @brief Vector mapping */ template result_type operator()(vector_base const & vec) const { mapped_vector * p = new mapped_vector(utils::type_to_string::value()); p->name_ = create_name(current_arg_, memory_, (void*)&vec); if(vec.start() > 0) p->start_name_ = p->name_ +"_start"; if(vec.stride() > 1) p->stride_name_ = p->name_ + "_stride"; return container_ptr_type(p); } /** @brief Implicit vector mapping */ template result_type operator()(implicit_vector_base const & vec) const { mapped_implicit_vector * p = new mapped_implicit_vector(utils::type_to_string::value()); if(vec.is_value_static()==false) p->value_name_ = create_name(current_arg_, memory_, NULL); if(vec.has_index()) p->value_name_ = create_name(current_arg_, memory_, NULL); return container_ptr_type(p); } /** @brief Matrix mapping */ template result_type operator()(matrix_base const & mat) const { mapped_matrix * p = new mapped_matrix(utils::type_to_string::value()); p->name_ = create_name(current_arg_, memory_, (void*)&mat); p->is_row_major_ = static_cast(utils::is_same_type::value); if(mat.start1() > 0) p->start1_name_ = p->name_ +"_start1"; if(mat.stride1() > 1) p->stride1_name_ = p->name_ + "_stride1"; if(mat.start2() > 0) p->start2_name_ = p->name_ +"_start2"; if(mat.stride2() > 1) p->stride2_name_ = p->name_ + "_stride2"; return container_ptr_type(p); } /** @brief Implicit matrix mapping */ template result_type operator()(implicit_matrix_base const & mat) const { mapped_implicit_matrix * p = new mapped_implicit_matrix(utils::type_to_string::value()); if(mat.is_value_static()==false) p->value_name_ = create_name(current_arg_, memory_, NULL); return container_ptr_type(p); } /** @brief Traversal functor */ void operator()(viennacl::scheduler::statement const * statement, viennacl::scheduler::statement_node const * root_node, detail::node_type node_type) const { const key_type key(root_node, node_type); if(node_type == LHS_NODE_TYPE && root_node->lhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) mapping_.insert(mapping_type::value_type(key, utils::call_on_element(root_node->lhs, *this))); else if(node_type == RHS_NODE_TYPE && root_node->rhs.type_family != viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) mapping_.insert(mapping_type::value_type(key, utils::call_on_element(root_node->rhs, *this))); else if( node_type== PARENT_NODE_TYPE){ viennacl::scheduler::operation_node_type op_type = root_node->op.type; if(op_type == viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE) mapping_.insert(mapping_type::value_type(key, binary_leaf(statement, root_node, &mapping_))); else if(op_type == viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE) mapping_.insert(mapping_type::value_type(key, binary_leaf(statement, root_node, &mapping_))); else if(op_type == viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE) mapping_.insert(mapping_type::value_type(key, binary_leaf(statement, root_node, &mapping_))); } } private: std::map & memory_; unsigned int & current_arg_; mapping_type & mapping_; }; } } } #endif ViennaCL-1.5.1-src/viennacl/generator/matrix_product.hpp000644 001750 001750 00000072770 12267307531 023345 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_GENERATE_MATRIX_PRODUCT_HPP #define VIENNACL_GENERATOR_GENERATE_MATRIX_PRODUCT_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/matrix_product.hpp * * @brief Kernel template for the matrix product operation */ #include #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/profile_base.hpp" #include "viennacl/generator/mapped_objects.hpp" #include "viennacl/generator/utils.hpp" #include "viennacl/forwards.h" #include "viennacl/tools/tools.hpp" namespace viennacl{ namespace generator{ /** @brief Kernel generation class for matrix-matrix products. */ class matrix_product : public profile_base{ enum access_flow{ REGULAR, STRIDED }; bool is_slow_impl(viennacl::ocl::device const &) const { return false; } vcl_size_t lmem_used(vcl_size_t scalartype_size) const { vcl_size_t lmem_used = 0; if(use_lhs_shared_) lmem_used += (ml_ + 1) * (cache_width_ + 1) * scalartype_size; if(use_rhs_shared_) lmem_used += (cache_width_ + 1) * (nl_ + 1) * scalartype_size; return lmem_used; } virtual void print(std::ostream & s) const{ s << "{vector_type, local_size1, cache_width, local_size2, ms, ks, ns, use_lhs_shared, use_rhs_shared} = {" << vector_size_ << "," << local_size1_ << ", " << cache_width_ << ", " << local_size2_ << ", " << ms_ << ", " << ks_ << ", " << ns_ << ", " << use_lhs_shared_ << ", " << use_rhs_shared_ << "}" ; } bool invalid_impl(viennacl::ocl::device const & /*dev*/, vcl_size_t /*scalartype_size*/) const{ static const unsigned int alignment = 128; return ml_ > alignment || cache_width_ > alignment || nl_ > alignment || ml_ < ms_ || cache_width_ < ks_ || nl_ < ns_ || (ms_ % vector_size_) > 0 || (ks_ % vector_size_) > 0 || (ns_ % vector_size_) > 0; } public: /** @brief The user constructor */ matrix_product(unsigned int vectorization , vcl_size_t local_size1, vcl_size_t cache_width, vcl_size_t local_size2 , unsigned int ms, unsigned int ks, unsigned int ns , bool use_lhs_shared, bool use_rhs_shared) : profile_base(vectorization,local_size1, local_size2,1){ local_size1_ = local_size1; local_size2_ = local_size2; cache_width_=cache_width; ml_= ms*local_size1; nl_=ns*local_size2; ms_ = ms; ks_=ks; ns_=ns; use_lhs_shared_ = use_lhs_shared; use_rhs_shared_ = use_rhs_shared; } static std::string csv_format() { return "Vec,LSize1,CacheWidth,LSize2,mS,kS,nS,NumGroups"; } std::string csv_representation() const{ std::ostringstream oss; oss << vector_size_ << "," << local_size1_ << "," << cache_width_ << "," << local_size2_ << "," << ms_ << "," << ks_ << "," << ns_ << "," << use_lhs_shared_ << "," << use_rhs_shared_; return oss.str(); } void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const { //set M, N scheduler::statement_node const & first_node = statements.front().second; vcl_size_t M = utils::call_on_matrix(first_node.lhs, utils::internal_size1_fun()); vcl_size_t N = utils::call_on_matrix(first_node.lhs, utils::internal_size2_fun()); //set ND range configure_local_sizes(k, kernel_id); k.global_work_size(0, M/ms_); k.global_work_size(1, N/ns_); //set arguments //M,N k.arg(n_arg++, cl_uint(M)); k.arg(n_arg++, cl_uint(N)); //K for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ scheduler::statement::container_type exprs = it->first.array(); for(scheduler::statement::container_type::iterator iit = exprs.begin() ; iit != exprs.end() ; ++iit){ if(iit->op.type==scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE){ scheduler::statement_node const * current_node = &(*iit); //The LHS of the prod is a matrix if(current_node->lhs.type_family==scheduler::MATRIX_TYPE_FAMILY) { k.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size2_fun()))); } else{ //The LHS of the prod is a matrix expression current_node = &exprs[current_node->lhs.node_index]; if(current_node->lhs.type_family==scheduler::MATRIX_TYPE_FAMILY) { if(current_node->op.type==scheduler::OPERATION_UNARY_TRANS_TYPE) k.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size1_fun()))); else k.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size2_fun()))); } else{ assert(false && bool("unexpected expression tree")); } } return; } } } } static std::string size1() { return "M"; } static std::string size2() { return "K"; } static std::string size3() { return "N"; } void kernel_arguments(statements_type const & /*statements*/, std::string & arguments_string) const{ arguments_string += detail::generate_value_kernel_argument("unsigned int", "M"); arguments_string += detail::generate_value_kernel_argument("unsigned int", "N"); arguments_string += detail::generate_value_kernel_argument("unsigned int", "K"); } private: void transform_block(detail::mapped_matrix const & /*mat_infos*/, bool store_shared , unsigned int & large_block_1, unsigned int & large_block_2 , unsigned int & small_block_1, unsigned int & small_block_2 , access_flow flow) const { if(flow==REGULAR){ large_block_2/=vector_size_; if(!store_shared) small_block_2/=vector_size_; } else{ large_block_1/=vector_size_; if(!store_shared) small_block_1/=vector_size_; } } std::string helper_variable(utils::kernel_generation_stream & stream , bool store_in_register , std::string const & type , std::string const & name , std::string const & expr) const { if(!store_in_register) return expr; stream << type << " " << name << " = " << expr << ";" << std::endl; return name; } void fetch_element_to_local_mem(utils::kernel_generation_stream & stream, std::string const & lmem_name, vcl_size_t lmem_size2, std::string const & global_ptr, detail::mapped_matrix const & mat, access_flow flow, std::string const & i, std::string const & j) const { if(flow==REGULAR){ stream << "val = *(" << global_ptr << " + " << j << " + " << mat.size2() << "*" << i << ");" << std::endl; for(unsigned int a = 0 ; a < vector_size_ ; ++a) if(vector_size_>1) stream << lmem_name << "[" << i << "*" << lmem_size2 << " + " << j << "*" << vector_size_<<" + " << a << "] = val.s" << a << ";" <1) stream << lmem_name << "[" << i << "*" << vector_size_*lmem_size2 << " + " << j << " + " << a*lmem_size2 << "] = val.s" << a << ";" < 1) aligned_scalartype+=utils::to_string(vector_size_); stream << "barrier(CLK_LOCAL_MEM_FENCE);" << std::endl; stream << "{" << std::endl; stream << aligned_scalartype << " val;" << std::endl; //Can unroll if(bound2%local_size2_==0 && bound1%local_size1_==0){ for(unsigned int j = 0 ; j < bound2 ; j+=static_cast(local_size2_)){ for(unsigned int i = 0 ; i < bound1 ; i+=static_cast(local_size1_)){ std::string indi = "(get_local_id(0) + " + utils::to_string(i)+")"; std::string indj = "(get_local_id(1) + " + utils::to_string(j)+")"; fetch_element_to_local_mem(stream,lmem_name,lmem_size2,global_ptr,mat,flow,indi,indj); } } } else{ stream << "for(unsigned int j = get_local_id(1)" << " ; j < " << bound2 << "; j+= " << local_size2_ << "){" << std::endl; stream.inc_tab(); stream << "for(unsigned int i = get_local_id(0)" << " ; i < " << bound1 << "; i+= " << local_size1_ << "){" << std::endl; stream.inc_tab(); fetch_element_to_local_mem(stream,lmem_name,lmem_size2,global_ptr,mat,flow,"i","j"); stream.dec_tab(); stream << "}" << std::endl; stream.dec_tab(); stream << "}" << std::endl; } stream << "}" << std::endl; stream << "barrier(CLK_LOCAL_MEM_FENCE);" << std::endl; } void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector const & mapping) const { ////////////////// /// INIT /// ////////////// detail::mapped_matrix const * assigned = static_cast(at(mapping.at(0), std::make_pair(&statements.front().second,detail::LHS_NODE_TYPE)).get()); detail::mapped_matrix_product* prod = NULL; detail::mapped_matrix const * lhs = NULL; detail::mapped_matrix const * rhs = NULL; bool is_lhs_transposed = false; bool is_rhs_transposed = false; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ scheduler::statement::container_type const & exprs = it->first.array(); vcl_size_t i = std::distance(statements.begin(), it); for(scheduler::statement::container_type::const_iterator iit = exprs.begin() ; iit != exprs.end() ; ++iit){ if(iit->op.type==scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE){ prod = (detail::mapped_matrix_product *)at(mapping.at(i), std::make_pair(&(*iit), detail::PARENT_NODE_TYPE)).get(); if(iit->lhs.type_family == scheduler::COMPOSITE_OPERATION_FAMILY){ is_lhs_transposed = true; lhs = (detail::mapped_matrix const *)at(mapping.at(i), std::make_pair(&exprs[iit->lhs.node_index],detail::LHS_NODE_TYPE)).get(); } else{ is_lhs_transposed = false; lhs = (detail::mapped_matrix const *)at(mapping.at(i), std::make_pair(&(*iit), detail::LHS_NODE_TYPE)).get(); } if(iit->rhs.type_family == scheduler::COMPOSITE_OPERATION_FAMILY){ is_rhs_transposed = true; rhs = (detail::mapped_matrix const *)at(mapping.at(i), std::make_pair(&exprs[iit->rhs.node_index], detail::LHS_NODE_TYPE)).get(); } else{ is_rhs_transposed = false; rhs = (detail::mapped_matrix const *)at(mapping.at(i), std::make_pair(&(*iit),detail::RHS_NODE_TYPE)).get(); } } } } if(vector_size_>1){ std::string StrV = "/"+utils::to_string(vector_size_) ; for(detail::mapping_type::const_iterator it = mapping.front().begin() ; it != mapping.front().end() ; ++it){ if(detail::mapped_matrix const * p = dynamic_cast(it->second.get())){ if(p->is_row_major()) p->bind_sizes("M", "N"+StrV); else p->bind_sizes("M"+StrV, "N"); } } if(lhs->is_row_major()) if(is_lhs_transposed) lhs->bind_sizes("M"+StrV, "K"); else lhs->bind_sizes("M", "K"+StrV); else if(is_lhs_transposed) lhs->bind_sizes("M", "K"+StrV); else lhs->bind_sizes("M"+StrV, "K"); if(rhs->is_row_major()) if(is_rhs_transposed) rhs->bind_sizes("K"+StrV, "N"); else rhs->bind_sizes("K", "N"+StrV); else if(is_rhs_transposed) rhs->bind_sizes("K", "N"+StrV); else rhs->bind_sizes("K"+StrV, "N"); } else{ for(detail::mapping_type::const_iterator it = mapping.front().begin() ; it != mapping.front().end() ; ++it){ if(detail::mapped_matrix const * p = dynamic_cast(it->second.get())){ p->bind_sizes("M", "N"); } } lhs->bind_sizes("M", "K"); rhs->bind_sizes("K", "N"); } std::string aligned_scalartype = assigned->scalartype(); if(vector_size_ > 1) aligned_scalartype+=utils::to_string(vector_size_); access_flow result_access_flow; if(assigned->is_row_major()) result_access_flow = REGULAR; else result_access_flow = STRIDED; access_flow lhs_access_flow; if((lhs->is_row_major() && !is_lhs_transposed) ||(!lhs->is_row_major() && is_lhs_transposed)) lhs_access_flow = REGULAR; else lhs_access_flow = STRIDED; access_flow rhs_access_flow; if((rhs->is_row_major() && !is_rhs_transposed) ||(!rhs->is_row_major() && is_rhs_transposed)) rhs_access_flow = REGULAR; else rhs_access_flow = STRIDED; std::string lhs_value_scalartype; if(use_lhs_shared_) lhs_value_scalartype = lhs->scalartype(); else lhs_value_scalartype = aligned_scalartype; std::string rhs_value_scalartype; if(use_rhs_shared_) rhs_value_scalartype = rhs->scalartype(); else rhs_value_scalartype = aligned_scalartype; unsigned int ml_res = static_cast(ml_), nl_res = static_cast(nl_), ms_res = static_cast(ms_), ns_res = static_cast(ns_); unsigned int ml_lhs = static_cast(ml_), cache_width_lhs = static_cast(cache_width_), ms_lhs = static_cast(ms_), ks_lhs = static_cast(ks_); unsigned int cache_width_rhs = static_cast(cache_width_), nl_rhs = static_cast(nl_), ks_rhs = static_cast(ks_), ns_rhs = static_cast(ns_); transform_block(*assigned,false,ml_res,nl_res,ms_res,ns_res,result_access_flow); transform_block(*lhs,use_lhs_shared_,ml_lhs,cache_width_lhs,ms_lhs,ks_lhs,lhs_access_flow); transform_block(*rhs,use_rhs_shared_,cache_width_rhs,nl_rhs,ks_rhs,ns_rhs,rhs_access_flow); ////////////////// /// DECLARATIONS /// ////////////// vcl_size_t local_lhs_size1 = ml_ ; vcl_size_t local_lhs_size2 = cache_width_ + 1; vcl_size_t local_rhs_size1 = cache_width_; vcl_size_t local_rhs_size2 = nl_ + 1; ///Result Values for(unsigned int m=0; m< ms_res; ++m) for(unsigned int n=0; n < ns_res ; ++n) stream << aligned_scalartype << " " << "res" << m << "_" << n << " = (" << aligned_scalartype << ")(0) ;" << std::endl; ///Local memory if(use_lhs_shared_) stream << "__local " << lhs->scalartype() << " lhs_buf[" << local_lhs_size1*local_lhs_size2 << "]" << ";" << std::endl; if(use_rhs_shared_) stream << "__local " << rhs->scalartype() << " rhs_buf[" << local_rhs_size1*local_rhs_size2 << "]" << ";" << std::endl; ///Pointer to result //stream << "__global " << aligned_scalartype << "* res_ptr = " << assigned->name() << " + " << assigned->offset(std::make_pair("get_global_id(0)*" + utils::to_string(ms_res), "get_global_id(1)*" + utils::to_string(ns_res))) << ";" << std::endl; ///LHS - Local Memory Offset if(use_lhs_shared_){ std::string i = "get_group_id(0)*" + utils::to_string(ml_lhs); stream << "__global " << aligned_scalartype << "* global_lhs_ptr = " << lhs->name() << " + "; if(lhs_access_flow==REGULAR) stream << "(" << i << ")" << "*" << lhs->size2(); else stream << i; stream << ";" << std::endl; } ///LHS - Global Memory pointer else{ if(lhs_access_flow==REGULAR) for(unsigned int m=0; msize2() << "* (" << "get_group_id(0)*" << ml_lhs << "+" << "get_local_id(0)*" << ms_lhs << "+" << m << " );" << std::endl; else for(unsigned int k=0; ksize1() << ")*" << k << "+ " << "get_group_id(0)*" << ml_lhs << "+" << "get_local_id(0)*" << ms_lhs << ";" << std::endl; } ///RHS - Local Memory Offset if(use_rhs_shared_){ std::string j = "get_group_id(1)*" + utils::to_string(nl_rhs); stream << "__global " << aligned_scalartype << "* global_rhs_ptr = " << rhs->name() << " + "; if(rhs_access_flow==REGULAR) stream << j; else stream << "(" << j << ")" << "*" << rhs->size1(); stream << ";" << std::endl; } ///RHS - Global Memory Pointer else{ if(rhs_access_flow==REGULAR) for(unsigned int k = 0 ; k < ks_rhs ; ++k) stream << "__global " << aligned_scalartype << "* " << "rhs_ptr_" << k << " = " << rhs->name() << " + " << "(" << k << ")" << "*" << rhs->size2() << " + " << "get_local_id(1)*" << ns_rhs << " + get_group_id(1)*" << nl_rhs << ";" << std::endl; else for(unsigned int n = 0 ; n < ns_rhs ; ++n) stream << "__global " << aligned_scalartype << "* " << "rhs_ptr_" << n << " = " << rhs->name() << " + " << "(" << "get_local_id(1)*" << ns_rhs << " + get_group_id(1)*" << nl_rhs << " + " << n << ")" << "*" << rhs->size1() << ";" << std::endl; } ///Large Work-group Wise loop std::string block_num = helper_variable(stream,false,"unsigned int", "block_num", "K/" + utils::to_string(cache_width_)); stream << "for(unsigned int bl=0 ; bl<" << block_num << " ; ++bl){" << std::endl; stream.inc_tab(); ///Update LHS Local Memory and pointers (if necessary) if(use_lhs_shared_){ fetch_to_local_mem(stream,"lhs_buf",local_lhs_size2,"global_lhs_ptr",ml_lhs,cache_width_lhs,*lhs,lhs_access_flow); for(unsigned int m=0; m1) res_oss << ".s" << a; lhs_oss << "val_lhs_" << ind_lhs_1 << "_" << ind_lhs_2; if(!use_lhs_shared_ && vector_size_>1) lhs_oss << ".s" << ind_s_lhs; rhs_oss << "val_rhs_" << ind_rhs_1 << "_" << ind_rhs_2; if(!use_rhs_shared_ && vector_size_>1) rhs_oss << ".s" << ind_s_rhs; stream << res_oss.str() << "+=" << lhs_oss.str() << "*" << rhs_oss.str() << ";" << std::endl; } } } } if(use_rhs_shared_){ for(unsigned int k=0 ; ksize2() << " - " << ns_rhs << ";" << std::endl; } if(!use_lhs_shared_){ if(lhs_access_flow==STRIDED) for(unsigned int k=0 ; ksize1() << " - " << ms_lhs << ";" << std::endl; } stream.dec_tab(); stream << "}" << std::endl; if(use_lhs_shared_){ if(lhs_access_flow==REGULAR) stream << "global_lhs_ptr += " << cache_width_lhs << ";" << std::endl; else stream << "global_lhs_ptr += " << cache_width_lhs << "*" << lhs->size1() << ";" << std::endl; } if(use_rhs_shared_){ if(rhs_access_flow==REGULAR) stream << "global_rhs_ptr += " << cache_width_rhs << "*" << rhs->size2() << ";" << std::endl; else stream << "global_rhs_ptr += " << cache_width_rhs << ";" << std::endl; } stream.dec_tab(); stream << "}" << std::endl; for(unsigned int m=0 ; m < ms_res ; ++m){ for(unsigned int n=0 ; n < ns_res ; ++n){ std::string i = "get_global_id(0)*" + utils::to_string(ms_res) + "+" + utils::to_string(m); std::string j = "get_global_id(1)*" + utils::to_string(ns_res) + "+" + utils::to_string(n); prod->access_name("res"+utils::to_string(m)+"_"+utils::to_string(n)); std::string str; detail::traverse(statements.front().first, statements.front().second, detail::expression_generation_traversal(std::make_pair(i, j), -1, str, mapping[0]), false); stream << str << ";" << std::endl; } } } private: vcl_size_t local_size1_; vcl_size_t local_size2_; vcl_size_t cache_width_; vcl_size_t ml_; vcl_size_t nl_; vcl_size_t ms_; vcl_size_t ks_; vcl_size_t ns_; bool use_lhs_shared_; bool use_rhs_shared_; }; } } #endif ViennaCL-1.5.1-src/viennacl/generator/scalar_reduction.hpp000644 001750 001750 00000037074 12267307531 023620 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_GENERATE_SCALAR_REDUCTION_HPP #define VIENNACL_GENERATOR_GENERATE_SCALAR_REDUCTION_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/scalar_reduction.hpp * * @brief Kernel template for the scalar reduction operation */ #include #include "viennacl/backend/opencl.hpp" #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/helpers.hpp" #include "viennacl/generator/utils.hpp" #include "viennacl/generator/profile_base.hpp" #include "viennacl/tools/tools.hpp" namespace viennacl{ namespace generator{ /** @brief OpenCL kernel generation template for scalar reduction operations such as s = norm_2(x). */ class scalar_reduction : public profile_base{ private: typedef std::vector > > temporaries_type; static void fill_scalartypes(statements_type statements, std::vector & res){ res.reserve(statements.size()); for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ if (it->second.lhs.type_family == scheduler::SCALAR_TYPE_FAMILY) { switch(it->second.lhs.numeric_type){ case scheduler::FLOAT_TYPE: res.push_back("float"); break; case scheduler::DOUBLE_TYPE: res.push_back("double"); break; default: res.push_back(""); break; } } else { res.push_back(""); } } } public: vcl_size_t lmem_used(vcl_size_t scalartype_size) const { return local_size_1_*scalartype_size; } void init_temporaries(statements_type const & statements) const { if(temporaries_.empty()){ //set temporary buffer argument for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ scheduler::statement::container_type const & array = it->first.array(); vcl_size_t size_of_scalartype; const char * scalartype_name; if (array[0].lhs.type_family != scheduler::SCALAR_TYPE_FAMILY) throw "not implemented"; switch(array[0].lhs.numeric_type){ case scheduler::FLOAT_TYPE: scalartype_name = "float"; size_of_scalartype = sizeof(float); break; case scheduler::DOUBLE_TYPE: scalartype_name = "double"; size_of_scalartype = sizeof(double); break; default: throw "not implemented"; } for(scheduler::statement::container_type::const_iterator iit = array.begin() ; iit != array.end() ; ++iit){ if(iit->op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE){ temporaries_.push_back(std::make_pair(scalartype_name, viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, static_cast(num_groups_*size_of_scalartype)))); } } } } } void set_size_argument(viennacl::scheduler::statement const & s, viennacl::scheduler::statement_node const & /*root_node*/, unsigned int & n_arg, viennacl::ocl::kernel & k) const { scheduler::statement::container_type exprs = s.array(); for(scheduler::statement::container_type::iterator it = exprs.begin() ; it != exprs.end() ; ++it){ if(it->op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE){ //set size argument scheduler::statement_node const * current_node = &(*it); vcl_size_t vector_size = 0; //The LHS of the prod is a vector if(current_node->lhs.type_family==scheduler::VECTOR_TYPE_FAMILY) { vector_size = utils::call_on_vector(current_node->lhs, utils::internal_size_fun()); } else{ //The LHS of the prod is a vector expression current_node = &exprs[current_node->lhs.node_index]; if(current_node->lhs.type_family==scheduler::VECTOR_TYPE_FAMILY) { vector_size = cl_uint(utils::call_on_vector(current_node->lhs, utils::internal_size_fun())); } else if(current_node->rhs.type_family==scheduler::VECTOR_TYPE_FAMILY) { vector_size = cl_uint(utils::call_on_vector(current_node->lhs, utils::internal_size_fun())); } else{ assert(false && bool("unexpected expression tree")); } } k.arg(n_arg++, cl_uint(vector_size/vector_size_)); } } } public: /** @brief The user constructor */ scalar_reduction(unsigned int vectorization, unsigned int local_size, unsigned int num_groups, unsigned int decomposition) : profile_base(vectorization, local_size, 1, 2), num_groups_(num_groups), decomposition_(decomposition){ } static std::string csv_format() { return "Vec,LSize,NumGroups,GlobalDecomposition"; } std::string csv_representation() const{ std::ostringstream oss; oss << vector_size_ << "," << local_size_1_ << "," << num_groups_ << "," << decomposition_; return oss.str(); } unsigned int num_groups() const { return num_groups_; } unsigned int decomposition() const { return decomposition_; } void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const{ //create temporaries init_temporaries(statements); //configure ND range if(kernel_id==0){ configure_local_sizes(k, 0); vcl_size_t gsize = local_size_1_*num_groups_; k.global_work_size(0,gsize); k.global_work_size(1,1); } else{ configure_local_sizes(k, 1); k.global_work_size(0,local_size_1_); k.global_work_size(1,1); } //set arguments set_size_argument(statements.front().first, statements.front().second, n_arg, k); for(temporaries_type::iterator it = temporaries_.begin() ; it != temporaries_.end() ; ++it){ k.arg(n_arg++, it->second); } } void kernel_arguments(statements_type const & statements, std::string & arguments_string) const{ init_temporaries(statements); arguments_string += detail::generate_value_kernel_argument("unsigned int", "N"); for(temporaries_type::iterator it = temporaries_.begin() ; it != temporaries_.end() ; ++it){ arguments_string += detail::generate_pointer_kernel_argument("__global", it->first, "temp" + utils::to_string(std::distance(temporaries_.begin(), it))); } } private: void core_0(utils::kernel_generation_stream& stream, std::vector exprs, std::vector const & scalartypes, statements_type const & /*statements*/, std::vector const & /*mapping*/) const { stream << "unsigned int lid = get_local_id(0);" << std::endl; for(vcl_size_t k = 0 ; k < exprs.size() ; ++k) stream << scalartypes[k] << " sum" << k << " = 0;" << std::endl; if(decomposition_){ stream << "for(unsigned int i = get_global_id(0) ; i < N ; i += get_global_size(0)){" << std::endl; } else{ stream << "unsigned int chunk_size = (N + get_num_groups(0)-1)/get_num_groups(0);" << std::endl; stream << "unsigned int chunk_start = get_group_id(0)*chunk_size;" << std::endl; stream << "unsigned int chunk_end = min(chunk_start+chunk_size, N);" << std::endl; stream << "for(unsigned int i = chunk_start + get_local_id(0) ; i < chunk_end ; i += get_local_size(0)){" << std::endl; } stream.inc_tab(); //Fetch vector entry std::set fetched; for(std::vector::iterator it = exprs.begin() ; it != exprs.end() ; ++it){ viennacl::scheduler::statement const & statement = (*it)->statement(); viennacl::scheduler::statement_node const & root_node = (*it)->root_node(); detail::fetch_all_lhs(fetched,statement,root_node, std::make_pair("i", "0"),vector_size_,stream,(*it)->mapping()); detail::fetch_all_rhs(fetched,statement,root_node, std::make_pair("i", "0"),vector_size_,stream,(*it)->mapping()); } //Update sums; for(std::vector::iterator it = exprs.begin() ; it != exprs.end() ; ++it){ viennacl::scheduler::statement const & statement = (*it)->statement(); viennacl::scheduler::statement_node const & root_node = (*it)->root_node(); if(vector_size_ > 1){ for(unsigned int a = 0 ; a < vector_size_ ; ++a){ std::string str; detail::generate_all_lhs(statement,root_node,std::make_pair("i","0"),a,str,(*it)->mapping()); str += "*"; detail::generate_all_rhs(statement,root_node,std::make_pair("i","0"),a,str,(*it)->mapping()); stream << " sum" << std::distance(exprs.begin(),it) << " += " << str << ";" << std::endl; } } else{ std::string str; detail::generate_all_lhs(statement,root_node,std::make_pair("i","0"),-1,str,(*it)->mapping()); str += "*"; detail::generate_all_rhs(statement,root_node,std::make_pair("i","0"),-1,str,(*it)->mapping()); stream << " sum" << std::distance(exprs.begin(),it) << " += " << str << ";" << std::endl; } } stream.dec_tab(); stream << "}" << std::endl; //Declare and fill local memory for(vcl_size_t k = 0 ; k < exprs.size() ; ++k) stream << "__local " << scalartypes[k] << " buf" << k << "[" << local_size_1_ << "];" << std::endl; for(vcl_size_t k = 0 ; k < exprs.size() ; ++k) stream << "buf" << k << "[lid] = sum" << k << ";" << std::endl; //Reduce local memory for(vcl_size_t stride = local_size_1_/2 ; stride>1 ; stride /=2){ stream << "barrier(CLK_LOCAL_MEM_FENCE); " << std::endl; stream << "if(lid < " << stride << "){" << std::endl; stream.inc_tab(); for(vcl_size_t k = 0 ; k < exprs.size() ; ++k){ stream << "buf" << k << "[lid] += buf" << k << "[lid + " << stride << "];" << std::endl; } stream.dec_tab(); stream << "}" << std::endl; } //Last reduction and write back to temporary buffer stream << "barrier(CLK_LOCAL_MEM_FENCE); " << std::endl; stream << "if(lid==0){" << std::endl; stream.inc_tab(); for(vcl_size_t k = 0 ; k < exprs.size() ; ++k) stream << "buf" << k << "[0] += buf" << k << "[1];" << std::endl; for(vcl_size_t k = 0 ; k < exprs.size() ; ++k) stream << "temp"<< k << "[get_group_id(0)] = buf" << k << "[0];" << std::endl; stream.dec_tab(); stream << "}" << std::endl; } void core_1(utils::kernel_generation_stream& stream, std::vector exprs, std::vector scalartypes, statements_type const & statements, std::vector const & mapping) const { stream << "unsigned int lid = get_local_id(0);" << std::endl; for(vcl_size_t k = 0 ; k < exprs.size() ; ++k) stream << "__local " << scalartypes[k] << " buf" << k << "[" << local_size_1_ << "];" << std::endl; for(vcl_size_t k = 0 ; k < exprs.size() ; ++k) stream << scalartypes[0] << " sum" << k << " = 0;" << std::endl; stream << "for(unsigned int i = lid ; i < " << num_groups_ << " ; i += get_local_size(0)){" << std::endl; stream.inc_tab(); for(vcl_size_t k = 0 ; k < exprs.size() ; ++k) stream << "sum" << k << " += temp" << k << "[i];" << std::endl; stream.dec_tab(); stream << "}" << std::endl; for(vcl_size_t k = 0 ; k < exprs.size() ; ++k) stream << "buf" << k << "[lid] = sum" << k << ";" << std::endl; //Reduce local memory for(vcl_size_t stride = local_size_1_/2 ; stride>1 ; stride /=2){ stream << "barrier(CLK_LOCAL_MEM_FENCE); " << std::endl; stream << "if(lid < " << stride << "){" << std::endl; stream.inc_tab(); for(vcl_size_t k = 0 ; k < exprs.size() ; ++k){ stream << "buf" << k << "[lid] += buf" << k << "[lid + " << stride << "];" << std::endl; } stream.dec_tab(); stream << "}" << std::endl; } stream << "barrier(CLK_LOCAL_MEM_FENCE); " << std::endl; stream << "if(lid==0){" << std::endl; stream.inc_tab(); for(vcl_size_t k = 0 ; k < exprs.size() ; ++k){ stream << "buf" << k << "[0] += buf" << k << "[1];" << std::endl; exprs[k]->access_name("buf"+utils::to_string(k)+"[0]"); } vcl_size_t i = 0; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ std::string str; detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("0", "0"), -1, str, mapping[i++]), false); stream << str << ";" << std::endl; } stream.dec_tab(); stream << "}" << std::endl; } void core(vcl_size_t kernel_id, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector const & mapping) const { std::vector exprs; for(std::vector::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it) for(detail::mapping_type::const_iterator iit = it->begin() ; iit != it->end() ; ++iit) if(detail::mapped_scalar_reduction * p = dynamic_cast(iit->second.get())) exprs.push_back(p); std::vector scalartypes; fill_scalartypes(statements, scalartypes); if(kernel_id==0){ core_0(stream,exprs,scalartypes,statements,mapping); } else{ core_1(stream,exprs,scalartypes,statements,mapping); } } private: unsigned int num_groups_; unsigned int decomposition_; mutable temporaries_type temporaries_; }; } } #endif ViennaCL-1.5.1-src/viennacl/generator/generate.hpp000644 001750 001750 00000054205 12267307531 022064 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_GENERATE_HPP #define VIENNACL_GENERATOR_GENERATE_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/generate.hpp @brief the user interface for the code generator */ #include #include #include #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/forwards.h" #include "viennacl/generator/profiles.hpp" #include "viennacl/generator/statement_representation_functor.hpp" #include "viennacl/generator/set_arguments_functor.hpp" #include "viennacl/generator/map_functor.hpp" #include "viennacl/tools/tools.hpp" namespace viennacl{ namespace generator{ /** @brief Class for handling code generation * * It is meant to be only used along with the scheduler.*/ class code_generator{ public: /** @brief typedef of the key used in the forced profiles. Contains the expression type and the size of the scalartype */ typedef std::pair forced_profile_key_type; private: typedef std::pair representation_node_type; typedef std::vector statements_type; typedef std::map > forced_profiles_type; /** @brief Check for the data access flow of a node. * * Row-major + Trans and Col-Major + NoTrans are equal in this regard. This prevents too much code duplication in the kernel templates. */ static bool is_flow_transposed(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node){ viennacl::scheduler::statement::container_type const & expr = statement.array(); if(root_node.op.type==viennacl::scheduler::OPERATION_UNARY_TRANS_TYPE) return root_node.lhs.subtype==viennacl::scheduler::DENSE_ROW_MATRIX_TYPE; else{ bool res = root_node.lhs.subtype==viennacl::scheduler::DENSE_COL_MATRIX_TYPE || root_node.rhs.subtype==viennacl::scheduler::DENSE_COL_MATRIX_TYPE; if(root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) res = res || is_lhs_flow_transposed(statement, expr[root_node.lhs.node_index]); if(root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) res = res || is_lhs_flow_transposed(statement, expr[root_node.rhs.node_index]); return res; } } /** @brief Checks for the data access flow of the LHS of a node */ static bool is_lhs_flow_transposed(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node){ scheduler::statement::container_type const & expr = statement.array(); if(root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) return is_flow_transposed(statement, expr[root_node.lhs.node_index]); else return root_node.lhs.subtype==viennacl::scheduler::DENSE_COL_MATRIX_TYPE; } /** @brief Checks for the data access flow of the RHS of a node */ static bool is_rhs_flow_transposed(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node){ viennacl::scheduler::statement::container_type const & expr = statement.array(); if(root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) return is_flow_transposed(statement, expr[root_node.rhs.node_index]); else return root_node.rhs.subtype==viennacl::scheduler::DENSE_COL_MATRIX_TYPE; } /** @brief Fills the expression descriptor for an operation of the type scalar = RHS */ static void fill_expression_descriptor_scalar(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node, expression_descriptor & descriptor){ viennacl::scheduler::statement::container_type const & expr = statement.array(); bool is_invalid = (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE) || (descriptor.type_family==SCALAR_REDUCE_FAMILY && root_node.op.type == viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE); if(is_invalid){ descriptor.type_family = INVALID_EXPRESSION_FAMILY; descriptor.type = INVALID_EXPRESSION_TYPE; } else if(root_node.op.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE){ descriptor.type_family = SCALAR_REDUCE_FAMILY; descriptor.type = SCALAR_REDUCE_TYPE; } if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) fill_expression_descriptor_scalar(statement, expr[root_node.lhs.node_index],descriptor); if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) fill_expression_descriptor_scalar(statement, expr[root_node.rhs.node_index],descriptor); } /** @brief Fills the expression descriptor for an operation of the type vector = RHS */ static void fill_expression_descriptor_vector(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node, expression_descriptor & descriptor){ viennacl::scheduler::statement::container_type const & expr = statement.array(); bool is_invalid = (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE) || (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE) || (descriptor.type_family==VECTOR_REDUCE_FAMILY && root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE); if(is_invalid){ descriptor.type_family=INVALID_EXPRESSION_FAMILY; descriptor.type=INVALID_EXPRESSION_TYPE; } else if(root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE){ descriptor.type_family=VECTOR_REDUCE_FAMILY; if(is_lhs_flow_transposed(statement,root_node)) descriptor.type=VECTOR_REDUCE_Tx_TYPE; else descriptor.type=VECTOR_REDUCE_Nx_TYPE; } if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) fill_expression_descriptor_vector(statement, expr[root_node.lhs.node_index],descriptor); if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) fill_expression_descriptor_vector(statement, expr[root_node.rhs.node_index],descriptor); } /** @brief Fills the expression descriptor for an operation of the type matrix = RHS */ static void fill_expression_descriptor_matrix(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node, expression_descriptor & descriptor){ viennacl::scheduler::statement::container_type const & expr = statement.array(); bool is_invalid = (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE) || (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE) || (descriptor.type_family==MATRIX_PRODUCT_FAMILY && root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE); if(is_invalid){ descriptor.type_family=INVALID_EXPRESSION_FAMILY; descriptor.type=INVALID_EXPRESSION_TYPE; } else if(root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE){ descriptor.type_family=MATRIX_PRODUCT_FAMILY; bool lhs_trans = is_lhs_flow_transposed(statement,root_node); bool rhs_trans = is_rhs_flow_transposed(statement,root_node); if(!lhs_trans && !rhs_trans) descriptor.type=MATRIX_PRODUCT_NN_TYPE; else if(lhs_trans && !rhs_trans) descriptor.type=MATRIX_PRODUCT_TN_TYPE; else if(!lhs_trans && rhs_trans) descriptor.type=MATRIX_PRODUCT_NT_TYPE; else if(lhs_trans && rhs_trans) descriptor.type=MATRIX_PRODUCT_TT_TYPE; } if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) fill_expression_descriptor_matrix(statement, expr[root_node.lhs.node_index],descriptor); if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) fill_expression_descriptor_matrix(statement, expr[root_node.rhs.node_index],descriptor); } /** @brief Fills the expression descriptor for a statement */ void fill_descriptor(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node, expression_descriptor & descriptor){ viennacl::scheduler::statement_node_type_family lhs_family = root_node.lhs.type_family; descriptor.scalartype_size = utils::call_on_element(root_node.lhs, utils::scalartype_size_fun()); if(lhs_family==viennacl::scheduler::VECTOR_TYPE_FAMILY){ descriptor.type_family = VECTOR_SAXPY_FAMILY; descriptor.type = VECTOR_SAXPY_TYPE; fill_expression_descriptor_vector(statement,root_node,descriptor); } else if(lhs_family==viennacl::scheduler::MATRIX_TYPE_FAMILY){ descriptor.type_family = MATRIX_SAXPY_FAMILY; descriptor.type = MATRIX_SAXPY_TYPE; fill_expression_descriptor_matrix(statement,root_node,descriptor); } else if(lhs_family==viennacl::scheduler::SCALAR_TYPE_FAMILY){ descriptor.type_family = SCALAR_SAXPY_FAMILY; descriptor.type = SCALAR_SAXPY_TYPE; fill_expression_descriptor_scalar(statement,root_node,descriptor); } } /** @brief Sets the kernel arguments and enqueue the kernels associated with a list of statements. * * The kernels are named 'kernel_'index of device in context'_'index of kernel in program' */ template void set_expression_arguments(profile_base const & profile, unsigned int device_offset, StatementsType const & statements, unsigned int & kernel_id, viennacl::ocl::program & p, std::list & kernels) const { for(vcl_size_t i = 0 ; i < profile.num_kernels() ; ++i){ //add kernel name char str[32]; std::sprintf(str,"kernel_%d_%d",device_offset,kernel_id); viennacl::ocl::kernel & kernel = p.get_kernel(str); kernels.push_back(&kernel); unsigned int current_arg = 0; //Configure ND Range and enqueue arguments profile.configure_range_enqueue_arguments(i, statements, kernel, current_arg); std::set memory; for(typename StatementsType::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ detail::traverse(it->first, it->second, detail::set_arguments_functor(memory,current_arg,kernel)); } ++kernel_id; } } /** @brief Gets the profile associated with a device and an expression descriptor */ profile_base const & get_profile(viennacl::ocl::device const & device, expression_descriptor const & descriptor) const { forced_profiles_type::const_iterator it = forced_profiles_.find(std::make_pair(descriptor.type, descriptor.scalartype_size)); if(it != forced_profiles_.end()) return *it->second; return *profiles::get(device,descriptor); } public: /** @brief The constructor */ code_generator(viennacl::ocl::context const & ctx = viennacl::ocl::current_context()) : ctx_(ctx){ statements_.reserve(16); } /** @brief Force the generator to use a specific profile for an operation */ template void force_profile(forced_profile_key_type key, T const & t){ forced_profiles_.insert(std::pair >(key, tools::shared_ptr(new T(t)))); } /** @brief Add a statement and the root node to the expression list * @return Whether or not the operation could be handled by the generator */ bool add(scheduler::statement const & statement, scheduler::statement_node const & root_node) { expression_descriptor descriptor; fill_descriptor(statement, root_node, descriptor); if(descriptor.type_family==INVALID_EXPRESSION_FAMILY) return false; if(statements_.empty()) statements_.push_back(std::make_pair(descriptor,profile_base::statements_type(1,std::make_pair(statement, root_node)))); else if(statements_.back().first == descriptor) statements_.back().second.push_back(std::make_pair(statement, root_node)); else statements_.push_back(std::make_pair(descriptor,profile_base::statements_type(1,std::make_pair(statement, root_node)))); return true; } /** @brief Set the arguments for a program previously generated by the generator and fills the kernels */ void configure_program(viennacl::ocl::program & p, std::list & kernels) const { unsigned int kernel_id = 0; std::vector::const_iterator found = std::find(ctx_.devices().begin(),ctx_.devices().end(),ctx_.current_device()); for(statements_type::const_iterator it = statements_.begin() ; it != statements_.end() ; ++it) set_expression_arguments(get_profile(ctx_.current_device(), it->first), static_cast(std::distance(ctx_.devices().begin(), found)), it->second, kernel_id, p, kernels); } /** @brief Creates an identifier string for the set of expressions in the object */ void make_program_name(char * program_name) const { unsigned int current_arg = 0; void* memory[64] = {NULL}; for(statements_type::const_iterator it = statements_.begin() ; it != statements_.end() ; ++it){ for(profile_base::statements_type::const_iterator iit = it->second.begin() ; iit != it->second.end() ; ++iit){ detail::traverse(iit->first, iit->second, detail::statement_representation_functor(memory, current_arg, program_name)); } } *program_name='\0'; } /** @brief Creates the OpenCL program string from the set of expressions in the object */ std::string make_opencl_program_string() const { utils::kernel_generation_stream stream; //Headers generation stream << "#if defined(cl_khr_fp64)\n"; stream << "# pragma OPENCL EXTENSION cl_khr_fp64: enable\n"; stream << "#elif defined(cl_amd_fp64)\n"; stream << "# pragma OPENCL EXTENSION cl_amd_fp64: enable\n"; stream << "#endif\n"; stream << std::endl; vcl_size_t device_offset =0; for(std::vector::const_iterator it = ctx_.devices().begin() ; it != ctx_.devices().end() ; ++it) for(statements_type::const_iterator iit = statements_.begin() ; iit != statements_.end() ; ++iit) get_profile(*it,iit->first)(stream,device_offset++,iit->second); return stream.str(); } /** @brief Creates the CUDA device code from the set of expressions in the object * * Performs just a direct translation... */ std::string make_cuda_program_string() const { //Creates OpenCL string with #ifdef and attributes utils::kernel_generation_stream stream; vcl_size_t device_offset =0; for(std::vector::const_iterator it = ctx_.devices().begin() ; it != ctx_.devices().end() ; ++it) for(statements_type::const_iterator iit = statements_.begin() ; iit != statements_.end() ; ++iit) get_profile(*it,iit->first)(stream,device_offset++,iit->second); std::string res = stream.str(); viennacl::tools::find_and_replace(res,"__attribute__","//__attribute__"); //Pointer viennacl::tools::find_and_replace(res, "__global float*", "float*"); viennacl::tools::find_and_replace(res, "__local float*", "float*"); viennacl::tools::find_and_replace(res, "__global double*", "double*"); viennacl::tools::find_and_replace(res, "__local double*", "double*"); //Qualifiers viennacl::tools::find_and_replace(res,"__global","__device__"); viennacl::tools::find_and_replace(res,"__kernel","__global__"); viennacl::tools::find_and_replace(res,"__constant","__constant__"); viennacl::tools::find_and_replace(res,"__local","__shared__"); //Indexing viennacl::tools::find_and_replace(res,"get_num_groups(0)","gridDim.x"); viennacl::tools::find_and_replace(res,"get_num_groups(1)","gridDim.y"); viennacl::tools::find_and_replace(res,"get_local_size(0)","blockDim.x"); viennacl::tools::find_and_replace(res,"get_local_size(1)","blockDim.y"); viennacl::tools::find_and_replace(res,"get_group_id(0)","blockIdx.x"); viennacl::tools::find_and_replace(res,"get_group_id(1)","blockIdx.y"); viennacl::tools::find_and_replace(res,"get_local_id(0)","threadIdx.x"); viennacl::tools::find_and_replace(res,"get_local_id(1)","threadIdx.y"); viennacl::tools::find_and_replace(res,"get_global_id(0)","(blockIdx.x*blockDim.x + threadIdx.x)"); viennacl::tools::find_and_replace(res,"get_global_id(1)","(blockIdx.y*blockDim.y + threadIdx.y)"); //Synchronization viennacl::tools::find_and_replace(res,"barrier(CLK_LOCAL_MEM_FENCE)","__syncthreads()"); viennacl::tools::find_and_replace(res,"barrier(CLK_GLOBAL_MEM_FENCE)","__syncthreads()"); return res; } private: statements_type statements_; viennacl::ocl::context const & ctx_; forced_profiles_type forced_profiles_; }; /** @brief Creates the program associated with a generator object and fills the kernels. Checks the context for the program and possibly (re)compile it. * * @param generator the generator to work on * @param kernels this list will be filled with the kernels associated with the generator * @param force_recompilation if true, the program will be recompiled */ inline viennacl::ocl::program & get_configured_program(viennacl::generator::code_generator const & generator, std::list & kernels, bool force_recompilation = false){ char* program_name = new char[256]; generator.make_program_name(program_name); if(force_recompilation) viennacl::ocl::current_context().delete_program(program_name); if(!viennacl::ocl::current_context().has_program(program_name)){ std::string source_code = generator.make_opencl_program_string(); #ifdef VIENNACL_DEBUG_BUILD std::cout << "Building " << program_name << "..." << std::endl; std::cout << source_code << std::endl; #endif viennacl::ocl::current_context().add_program(source_code, program_name); } viennacl::ocl::program & p = viennacl::ocl::current_context().get_program(program_name); generator.configure_program(p, kernels); delete[] program_name; return p; } /** @brief Set the arguments and enqueue a generator object */ inline void enqueue(viennacl::generator::code_generator const & generator, bool force_recompilation = false){ std::list kernels; get_configured_program(generator, kernels, force_recompilation); for(std::list::iterator it = kernels.begin() ; it != kernels.end() ; ++it){ viennacl::ocl::enqueue(**it, (*it)->context().get_queue()); } } /** @brief Convenience function to get the OpenCL program string for a single statement */ inline std::string get_opencl_program_string(viennacl::scheduler::statement const & s){ generator::code_generator gen; gen.add(s,s.array()[0]); return gen.make_opencl_program_string(); } /** @brief Convenience function to get the CUDA device code for a single statement */ inline std::string get_cuda_device_code(viennacl::scheduler::statement const & s){ generator::code_generator gen; gen.add(s, s.array()[0]); return gen.make_cuda_program_string(); } /** @brief Generate and enqueue a statement plus root_node into the current queue */ inline void generate_enqueue_statement(viennacl::scheduler::statement const & s, scheduler::statement_node const & root_node){ generator::code_generator gen; gen.add(s,root_node); viennacl::generator::enqueue(gen); } /** @brief Generate and enqueue a statement into the current queue, assumes the root_node is the first node of the statement */ inline void generate_enqueue_statement(viennacl::scheduler::statement const & s){ generate_enqueue_statement(s, s.array()[0]); } } } #endif ViennaCL-1.5.1-src/viennacl/generator/utils.hpp000644 001750 001750 00000024171 12267307531 021431 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_UTILS_HPP #define VIENNACL_GENERATOR_UTILS_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/utils.hpp @brief Internal utils for a dynamic OpenCL kernel generation. */ #include #include "viennacl/ocl/forwards.h" #include "viennacl/traits/size.hpp" #include "viennacl/scheduler/forwards.h" namespace viennacl{ namespace generator{ namespace utils{ template static typename Fun::result_type call_on_host_scalar(scheduler::lhs_rhs_element element, Fun const & fun){ assert(element.type_family == scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a host scalar")); switch(element.numeric_type){ case scheduler::FLOAT_TYPE : return fun(element.host_float); case scheduler::DOUBLE_TYPE : return fun(element.host_double); default : throw "not implemented"; } } template static typename Fun::result_type call_on_scalar(scheduler::lhs_rhs_element element, Fun const & fun){ assert(element.type_family == scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a scalar")); switch(element.numeric_type){ case scheduler::FLOAT_TYPE : return fun(*element.scalar_float); case scheduler::DOUBLE_TYPE : return fun(*element.scalar_double); default : throw "not implemented"; } } template static typename Fun::result_type call_on_vector(scheduler::lhs_rhs_element element, Fun const & fun){ assert(element.type_family == scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a vector")); switch(element.numeric_type){ case scheduler::FLOAT_TYPE : return fun(*element.vector_float); case scheduler::DOUBLE_TYPE : return fun(*element.vector_double); default : throw "not implemented"; } } template static typename Fun::result_type call_on_implicit_vector(scheduler::lhs_rhs_element element, Fun const & fun){ assert(element.type_family == scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a implicit_vector")); assert(element.subtype == scheduler::IMPLICIT_VECTOR_TYPE && bool("Must be called on a implicit_vector")); switch(element.numeric_type){ case scheduler::FLOAT_TYPE : return fun(*element.implicit_vector_float); case scheduler::DOUBLE_TYPE : return fun(*element.implicit_vector_double); default : throw "not implemented"; } } template static typename Fun::result_type call_on_matrix(scheduler::lhs_rhs_element element, Fun const & fun){ assert(element.type_family == scheduler::MATRIX_TYPE_FAMILY && bool("Must be called on a matrix")); if (element.subtype == scheduler::DENSE_ROW_MATRIX_TYPE) { switch(element.numeric_type){ case scheduler::FLOAT_TYPE : return fun(*element.matrix_row_float); case scheduler::DOUBLE_TYPE : return fun(*element.matrix_row_double); default : throw "not implemented"; } } else { switch(element.numeric_type){ case scheduler::FLOAT_TYPE : return fun(*element.matrix_col_float); case scheduler::DOUBLE_TYPE : return fun(*element.matrix_col_double); default : throw "not implemented"; } } } template static typename Fun::result_type call_on_implicit_matrix(scheduler::lhs_rhs_element element, Fun const & fun){ assert(element.type_family == scheduler::MATRIX_TYPE_FAMILY && bool("Must be called on a matrix_vector")); assert(element.subtype == scheduler::IMPLICIT_MATRIX_TYPE && bool("Must be called on a matrix_vector")); switch(element.numeric_type){ case scheduler::FLOAT_TYPE : return fun(*element.implicit_matrix_float); case scheduler::DOUBLE_TYPE : return fun(*element.implicit_matrix_double); default : throw "not implemented"; } } template static typename Fun::result_type call_on_element(scheduler::lhs_rhs_element const & element, Fun const & fun){ switch(element.type_family){ case scheduler::SCALAR_TYPE_FAMILY: if (element.subtype == scheduler::HOST_SCALAR_TYPE) return call_on_host_scalar(element, fun); else return call_on_scalar(element, fun); case scheduler::VECTOR_TYPE_FAMILY : if (element.subtype == scheduler::IMPLICIT_VECTOR_TYPE) return call_on_implicit_vector(element, fun); else return call_on_vector(element, fun); case scheduler::MATRIX_TYPE_FAMILY: if (element.subtype == scheduler::IMPLICIT_MATRIX_TYPE) return call_on_implicit_matrix(element, fun); else return call_on_matrix(element,fun); default: throw "not implemented"; } } /** @brief Functor for returning the size of the underlying scalar type in bytes. */ struct scalartype_size_fun{ typedef vcl_size_t result_type; result_type operator()(float const &) const { return sizeof(float); } result_type operator()(double const &) const { return sizeof(double); } template result_type operator()(T const &) const { return sizeof(typename viennacl::result_of::cpu_value_type::type); } }; /** @brief Functor for returning the internal size of a vector. */ struct internal_size_fun{ typedef vcl_size_t result_type; template result_type operator()(T const &t) const { return viennacl::traits::internal_size(t); } }; /** @brief Functor for obtaining the OpenCL handle from ViennaCL objects (vector, matrix, etc.). */ struct handle_fun{ typedef cl_mem result_type; template result_type operator()(T const &t) const { return t.handle().opencl_handle(); } }; /** @brief Functor for obtaining the internal number of rows of a ViennaCL matrix. */ struct internal_size1_fun{ typedef vcl_size_t result_type; template result_type operator()(T const &t) const { return viennacl::traits::internal_size1(t); } }; /** @brief Functor for obtaining the internal number of columns of a ViennaCL matrix. */ struct internal_size2_fun{ typedef vcl_size_t result_type; template result_type operator()(T const &t) const { return viennacl::traits::internal_size2(t); } }; /** @brief Helper metafunction for checking whether two types are the same. */ template struct is_same_type { enum { value = 0 }; }; /** \cond */ template struct is_same_type { enum { value = 1 }; }; /** \endcond */ template inline std::string to_string ( T const t ) { std::stringstream ss; ss << t; return ss.str(); } /** @brief Helper struct for converting a numerical type to its string representation. */ template struct type_to_string; /** \cond */ template<> struct type_to_string { static const char * value() { return "float"; } }; template<> struct type_to_string { static const char * value() { return "double"; } }; /** \endcond */ /** @brief Helper struct for obtaining the first letter of a type. Used internally by the generator only. */ template struct first_letter_of_type; /** \cond */ template<> struct first_letter_of_type { static char value() { return 'f'; } }; template<> struct first_letter_of_type { static char value() { return 'd'; } }; template<> struct first_letter_of_type { static char value() { return 'r'; } }; template<> struct first_letter_of_type { static char value() { return 'c'; } }; /** \endcond */ /** @brief A stream class where the kernel sources are streamed to. Takes care of indentation of the sources. */ class kernel_generation_stream : public std::ostream{ private: class kgenstream : public std::stringbuf{ public: kgenstream(std::ostringstream& oss,unsigned int const & tab_count) : oss_(oss), tab_count_(tab_count){ } int sync() { for(unsigned int i=0 ; i #include "viennacl/matrix.hpp" #include "viennacl/vector.hpp" #include "viennacl/forwards.h" #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/forwards.h" #include "viennacl/meta/result_of.hpp" #include "viennacl/tools/shared_ptr.hpp" #include "viennacl/ocl/kernel.hpp" #include "viennacl/generator/helpers.hpp" #include "viennacl/generator/utils.hpp" #include "viennacl/generator/mapped_objects.hpp" namespace viennacl{ namespace generator{ namespace detail{ /** @brief Helper class for setting the arguments of a kernel. */ class set_arguments_functor : public traversal_functor{ public: typedef void result_type; set_arguments_functor(std::set & memory, unsigned int & current_arg, viennacl::ocl::kernel & kernel) : memory_(memory), current_arg_(current_arg), kernel_(kernel){ } template result_type operator()(ScalarType const & scal) const { typedef typename viennacl::result_of::cl_type::type cl_scalartype; kernel_.arg(current_arg_++, cl_scalartype(scal)); } /** @brief Scalar mapping */ template result_type operator()(scalar const & scal) const { if(memory_.insert((void*)&scal).second) kernel_.arg(current_arg_++, scal.handle().opencl_handle()); } /** @brief Vector mapping */ template result_type operator()(vector_base const & vec) const { if(memory_.insert((void*)&vec).second){ kernel_.arg(current_arg_++, vec.handle().opencl_handle()); if(viennacl::traits::start(vec)>0) kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start(vec))); if(vec.stride()>1) kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride(vec))); } } /** @brief Implicit vector mapping */ template result_type operator()(implicit_vector_base const & vec) const { typedef typename viennacl::result_of::cl_type::type cl_scalartype; if(memory_.insert((void*)&vec).second){ if(vec.is_value_static()==false) kernel_.arg(current_arg_++, cl_scalartype(vec.value())); if(vec.has_index()) kernel_.arg(current_arg_++, cl_uint(vec.index())); } } /** @brief Matrix mapping */ template result_type operator()(matrix_base const & mat) const { //typedef typename matrix_base::size_type size_type; if(memory_.insert((void*)&mat).second){ kernel_.arg(current_arg_++, mat.handle().opencl_handle()); if(viennacl::traits::start1(mat)>0) kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start1(mat))); if(viennacl::traits::stride1(mat)>1) kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride1(mat))); if(viennacl::traits::start2(mat)>0) kernel_.arg(current_arg_++, cl_uint(viennacl::traits::start2(mat))); if(viennacl::traits::stride2(mat)>1) kernel_.arg(current_arg_++, cl_uint(viennacl::traits::stride2(mat))); } } /** @brief Implicit matrix mapping */ template result_type operator()(implicit_matrix_base const & mat) const { if(mat.is_value_static()==false) kernel_.arg(current_arg_++, mat.value()); } /** @brief Traversal functor: */ void operator()(scheduler::statement const * /*statement*/, scheduler::statement_node const * root_node, detail::node_type node_type) const { if(node_type==LHS_NODE_TYPE && root_node->lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) utils::call_on_element(root_node->lhs, *this); else if(node_type==RHS_NODE_TYPE && root_node->rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) utils::call_on_element(root_node->rhs, *this); } private: std::set & memory_; unsigned int & current_arg_; viennacl::ocl::kernel & kernel_; }; } } } #endif ViennaCL-1.5.1-src/viennacl/generator/saxpy.hpp000644 001750 001750 00000022254 12267307531 021435 0ustar00rupprupp000000 000000 #ifndef VIENNACL_GENERATOR_GENERATE_SAXPY_HPP #define VIENNACL_GENERATOR_GENERATE_SAXPY_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/generator/saxpy.hpp * * @brief Kernel template for the saxpy-like operation */ #include #include "viennacl/scheduler/forwards.h" #include "viennacl/generator/mapped_objects.hpp" #include "viennacl/generator/helpers.hpp" #include "viennacl/generator/utils.hpp" #include "viennacl/generator/profile_base.hpp" #include "viennacl/tools/tools.hpp" namespace viennacl{ namespace generator{ /** @brief OpenCL kernel generation class for vector expressions of AXPY type, i.e. x = alpha * y + beta * z, where the number of summands can in principle be arbitrarily large. */ class vector_saxpy : public profile_base{ public: static std::string csv_format() { return "Vec,LSize1,NumGroups1,GlobalDecomposition"; } std::string csv_representation() const{ std::ostringstream oss; oss << vector_size_ << "," << local_size_1_ << "," << num_groups_ << "," << decomposition_; return oss.str(); } vector_saxpy(unsigned int v, vcl_size_t gs, vcl_size_t ng, unsigned int d) : profile_base(v, gs, 1, 1), num_groups_(ng), decomposition_(d){ } void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const{ configure_local_sizes(k, kernel_id); k.global_work_size(0,local_size_1_*num_groups_); k.global_work_size(1,1); scheduler::statement_node const & first_node = statements.front().second; viennacl::vcl_size_t N = utils::call_on_vector(first_node.lhs, utils::internal_size_fun()); k.arg(n_arg++, cl_uint(N/vector_size_)); } void kernel_arguments(statements_type const & /*statements*/, std::string & arguments_string) const{ arguments_string += detail::generate_value_kernel_argument("unsigned int", "N"); } private: void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector const & mapping) const { stream << "for(unsigned int i = get_global_id(0) ; i < N ; i += get_global_size(0))" << std::endl; stream << "{" << std::endl; stream.inc_tab(); //Fetches entries to registers std::set fetched; for(std::vector::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it) for(detail::mapping_type::const_reverse_iterator iit = it->rbegin() ; iit != it->rend() ; ++iit) //Useless to fetch cpu scalars into registers if(detail::mapped_handle * p = dynamic_cast(iit->second.get())) p->fetch( std::make_pair("i","0"), vector_size_, fetched, stream); //Generates all the expression, in order vcl_size_t i = 0; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ std::string str; detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i","0"), -1, str, mapping[i++])); stream << str << ";" << std::endl; } //Writes back for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it) //Gets the mapped object at the LHS of each expression if(detail::mapped_handle * p = dynamic_cast(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second, detail::LHS_NODE_TYPE)).get())) p->write_back( std::make_pair("i", "0"), fetched, stream); stream.dec_tab(); stream << "}" << std::endl; } private: vcl_size_t num_groups_; unsigned int decomposition_; }; /** @brief OpenCL kernel generation class for matrix expressions of AXPY type, i.e. A = alpha * B + beta * C, where the number of summands can in principle be arbitrarily large. */ class matrix_saxpy : public profile_base{ bool invalid_impl(viennacl::ocl::device const & /*dev*/, vcl_size_t /*scalartype_size*/) const{ return false; } bool is_slow_impl(viennacl::ocl::device const &) const { return false; } public: matrix_saxpy(unsigned int v, vcl_size_t gs1, vcl_size_t gs2, vcl_size_t ng1, vcl_size_t ng2, unsigned int d) : profile_base(v, gs1, gs2, 1), num_groups_row_(ng1), num_groups_col_(ng2), decomposition_(d){ } static std::string csv_format() { return "Vec,LSize1,LSize2,NumGroups1,NumGroups2,GlobalDecomposition"; } std::string csv_representation() const{ std::ostringstream oss; oss << vector_size_ << "," << local_size_1_ << "," << local_size_2_ << "," << num_groups_row_ << "," << num_groups_col_ << "," << decomposition_; return oss.str(); } void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const{ configure_local_sizes(k, kernel_id); k.global_work_size(0,local_size_1_*num_groups_row_); k.global_work_size(1,local_size_2_*num_groups_col_); scheduler::statement_node const & first_node = statements.front().second; k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size1_fun()))); k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size2_fun()))); } void kernel_arguments(statements_type const & /*statements*/, std::string & arguments_string) const{ arguments_string += detail::generate_value_kernel_argument("unsigned int", "M"); arguments_string += detail::generate_value_kernel_argument("unsigned int", "N"); } private: void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector const & mapping) const { for(std::vector::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it){ for(detail::mapping_type::const_iterator iit = it->begin() ; iit != it->end() ; ++iit){ if(detail::mapped_matrix * p = dynamic_cast(iit->second.get())) p->bind_sizes("M","N"); } } stream << "for(unsigned int i = get_global_id(0) ; i < M ; i += get_global_size(0))" << std::endl; stream << "{" << std::endl; stream.inc_tab(); stream << "for(unsigned int j = get_global_id(1) ; j < N ; j += get_global_size(1))" << std::endl; stream << "{" << std::endl; stream.inc_tab(); //Fetches entries to registers std::set fetched; for(std::vector::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it) for(detail::mapping_type::const_reverse_iterator it2 = it->rbegin() ; it2 != it->rend() ; ++it2) if(detail::mapped_matrix * p = dynamic_cast(it2->second.get())) p->fetch(std::make_pair("i", "j"), vector_size_, fetched, stream); vcl_size_t i = 0; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ std::string str; detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i", "j"), -1, str, mapping[i++])); stream << str << ";" << std::endl; } //Writes back for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ if(detail::mapped_handle * p = dynamic_cast(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second,detail::LHS_NODE_TYPE)).get())) p->write_back(std::make_pair("i", "j"), fetched, stream); } stream.dec_tab(); stream << "}" << std::endl; stream.dec_tab(); stream << "}" << std::endl; } private: vcl_size_t num_groups_row_; vcl_size_t num_groups_col_; unsigned int decomposition_; }; } } #endif ViennaCL-1.5.1-src/viennacl/circulant_matrix.hpp000644 001750 001750 00000033472 12267307531 021657 0ustar00rupprupp000000 000000 #ifndef VIENNACL_CIRCULANT_MATRIX_HPP #define VIENNACL_CIRCULANT_MATRIX_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file circulant_matrix.hpp @brief Implementation of the circulant_matrix class for efficient manipulation of circulant matrices. Experimental. */ #include "viennacl/forwards.h" #include "viennacl/vector.hpp" #include "viennacl/ocl/backend.hpp" #include "viennacl/linalg/circulant_matrix_operations.hpp" #include "viennacl/fft.hpp" namespace viennacl { /** @brief A Circulant matrix class * * @tparam SCALARTYPE The underlying scalar type (either float or double) * @tparam ALIGNMENT The internal memory size is given by (size()/ALIGNMENT + 1) * ALIGNMENT. ALIGNMENT must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. */ template class circulant_matrix { public: typedef viennacl::backend::mem_handle handle_type; typedef scalar::ResultType> value_type; /** * @brief The default constructor. Does not allocate any memory. * */ explicit circulant_matrix() {} /** * @brief Creates the matrix with the given size * * @param rows Number of rows of the matrix * @param cols Number of columns of the matrix */ explicit circulant_matrix(vcl_size_t rows, vcl_size_t cols) : elements_(rows) { assert(rows == cols && bool("Circulant matrix must be square!")); (void)cols; // avoid 'unused parameter' warning in optimized builds } /** @brief Resizes the matrix. * Existing entries can be preserved * * @param sz New size of matrix * @param preserve If true, existing values are preserved. */ void resize(vcl_size_t sz, bool preserve = true) { elements_.resize(sz, preserve); } /** @brief Returns the OpenCL handle * * @return OpenCL handle */ handle_type const & handle() const { return elements_.handle(); } /** * @brief Returns an internal viennacl::vector, which represents a circulant matrix elements * */ viennacl::vector & elements() { return elements_; } viennacl::vector const & elements() const { return elements_; } /** * @brief Returns the number of rows of the matrix */ vcl_size_t size1() const { return elements_.size(); } /** * @brief Returns the number of columns of the matrix */ vcl_size_t size2() const { return elements_.size(); } /** @brief Returns the internal size of matrix representtion. * Usually required for launching OpenCL kernels only * * @return Internal size of matrix representation */ vcl_size_t internal_size() const { return elements_.internal_size(); } /** * @brief Read-write access to a single element of the matrix * * @param row_index Row index of accessed element * @param col_index Column index of accessed element * @return Proxy for matrix entry */ entry_proxy operator()(vcl_size_t row_index, vcl_size_t col_index) { long index = static_cast(row_index) - static_cast(col_index); assert(row_index < size1() && col_index < size2() && bool("Invalid access")); while (index < 0) index += static_cast(size1()); return elements_[index]; } /** * @brief += operation for circulant matrices * * @param that Matrix which will be added * @return Result of addition */ circulant_matrix& operator +=(circulant_matrix& that) { elements_ += that.elements(); return *this; } private: circulant_matrix(circulant_matrix const &) {} circulant_matrix & operator=(circulant_matrix const & t); viennacl::vector elements_; }; /** @brief Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) * * * @param cpu_vec A std::vector on the host. * @param gpu_mat A circulant_matrix from ViennaCL */ template void copy(std::vector& cpu_vec, circulant_matrix& gpu_mat) { assert( (gpu_mat.size1() == 0 || cpu_vec.size() == gpu_mat.size1()) && bool("Size mismatch")); copy(cpu_vec, gpu_mat.elements()); } /** @brief Copies a circulant matrix from the OpenCL device (either GPU or multi-core CPU) to the std::vector * * * @param gpu_mat A circulant_matrix from ViennaCL * @param cpu_vec A std::vector on the host. */ template void copy(circulant_matrix& gpu_mat, std::vector& cpu_vec) { assert(cpu_vec.size() == gpu_mat.size1() && bool("Size mismatch")); copy(gpu_mat.elements(), cpu_vec); } /** @brief Copies a circulant matrix from the OpenCL device (either GPU or multi-core CPU) to the matrix-like object * * * @param circ_src A circulant_matrix from ViennaCL * @param com_dst A matrix-like object */ template void copy(circulant_matrix& circ_src, MATRIXTYPE& com_dst) { vcl_size_t size = circ_src.size1(); assert(size == viennacl::traits::size1(com_dst) && bool("Size mismatch")); assert(size == viennacl::traits::size2(com_dst) && bool("Size mismatch")); std::vector tmp(size); copy(circ_src, tmp); for (vcl_size_t i = 0; i < size; i++) { for (vcl_size_t j = 0; j < size; j++) { long index = static_cast(i) - static_cast(j); if (index < 0) index = static_cast(size + index); com_dst(i, j) = tmp[index]; } } } /** @brief Copies a the matrix-like object to the circulant matrix from the OpenCL device (either GPU or multi-core CPU) * * * @param com_src A std::vector on the host * @param circ_dst A circulant_matrix from ViennaCL */ template void copy(MATRIXTYPE& com_src, circulant_matrix& circ_dst) { assert( (circ_dst.size1() == 0 || circ_dst.size1() == viennacl::traits::size1(com_src)) && bool("Size mismatch")); assert( (circ_dst.size2() == 0 || circ_dst.size2() == viennacl::traits::size2(com_src)) && bool("Size mismatch")); vcl_size_t size = viennacl::traits::size1(com_src); std::vector tmp(size); for(vcl_size_t i = 0; i < size; i++) tmp[i] = com_src(i, 0); copy(tmp, circ_dst); } /*namespace linalg { template void prod_impl(circulant_matrix const & mat, vector const & vec, vector& result) { viennacl::vector circ(mat.elements().size() * 2); fft::real_to_complex(mat.elements(), circ, mat.elements().size()); viennacl::vector tmp(vec.size() * 2); viennacl::vector tmp2(vec.size() * 2); fft::real_to_complex(vec, tmp, vec.size()); fft::convolve(circ, tmp, tmp2); fft::complex_to_real(tmp2, result, vec.size()); } }*/ /** @brief Prints the matrix. Output is compatible to boost::numeric::ublas * * @param s STL output stream * @param gpu_matrix A ViennaCL circulant matrix */ template std::ostream & operator<<(std::ostream& s, circulant_matrix& gpu_matrix) { vcl_size_t size = gpu_matrix.size1(); std::vector tmp(size); copy(gpu_matrix, tmp); s << "[" << size << "," << size << "]("; for(vcl_size_t i = 0; i < size; i++) { s << "("; for(vcl_size_t j = 0; j < size; j++) { long index = static_cast(i) - static_cast(j); if(index < 0) index = static_cast(size) + index; s << tmp[index]; //s << index; if(j < (size - 1)) s << ","; } s << ")"; } s << ")"; return s; } // // Specify available operations: // /** \cond */ namespace linalg { namespace detail { // x = A * y template struct op_executor, op_assign, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { // check for the special case x = A * x if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs = temp; } else viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); } }; template struct op_executor, op_inplace_add, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs += temp; } }; template struct op_executor, op_inplace_sub, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs -= temp; } }; // x = A * vec_op template struct op_executor, op_assign, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs()); viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); } }; // x = A * vec_op template struct op_executor, op_inplace_add, vector_expression, vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs()); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs += temp_result; } }; // x = A * vec_op template struct op_executor, op_inplace_sub, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs()); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs -= temp_result; } }; } // namespace detail } // namespace linalg /** \endcond */ } #endif // VIENNACL_CIRCULANT_MATRIX_HPP ViennaCL-1.5.1-src/viennacl/slice.hpp000644 001750 001750 00000005043 12267307531 017377 0ustar00rupprupp000000 000000 #ifndef VIENNACL_SLICE_HPP_ #define VIENNACL_SLICE_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file slice.hpp @brief Implementation of a slice object for use with proxy objects */ #include #include #include #include "viennacl/forwards.h" namespace viennacl { /** @brief A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. * * Similar to the boost::numeric::ublas::basic_range class. */ template class basic_slice { public: typedef SizeType size_type; typedef DistanceType difference_type; typedef size_type value_type; typedef value_type const_reference; typedef const_reference reference; basic_slice() : start_(0), stride_(1), size_(0) {} basic_slice(size_type start_index, difference_type stride_arg, size_type size_arg) : start_(start_index), stride_(stride_arg), size_(size_arg) {} size_type start() const { return start_; } difference_type stride() const { return stride_; } size_type size() const { return size_; } const_reference operator()(size_type i) const { assert(i < size()); return start_ + i * stride_; } const_reference operator[](size_type i) const { return operator()(i); } bool operator==(const basic_slice & s) const { return (start_ == s.start_) && (stride_ == s.stride_) && (size_ == s.size_); } bool operator!=(const basic_slice & s) const { return !(*this == s); } private: size_type start_; difference_type stride_; size_type size_; }; } #endif ViennaCL-1.5.1-src/viennacl/context.hpp000644 001750 001750 00000005550 12267307531 017767 0ustar00rupprupp000000 000000 #ifndef VIENNACL_CONTEXT_HPP_ #define VIENNACL_CONTEXT_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/context.hpp @brief Implementation of a OpenCL-like context, which serves as a unification of {OpenMP, CUDA, OpenCL} at the user API. */ #include #include #include #include "viennacl/forwards.h" #include "viennacl/ocl/forwards.h" #include "viennacl/backend/mem_handle.hpp" namespace viennacl { /** @brief Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also suitable for CUDA and OpenMP * * Context objects are used to distinguish between different memory domains. One context may refer to an OpenCL device, another context may refer to a CUDA device, and a third context to main RAM. * Thus, operations are only defined on objects residing on the same context. */ class context { public: context() : mem_type_(viennacl::backend::default_memory_type()) { #ifdef VIENNACL_WITH_OPENCL if (mem_type_ == OPENCL_MEMORY) ocl_context_ptr_ = &viennacl::ocl::current_context(); else ocl_context_ptr_ = NULL; #endif } explicit context(viennacl::memory_types mtype) : mem_type_(mtype) { if (mem_type_ == MEMORY_NOT_INITIALIZED) mem_type_ = viennacl::backend::default_memory_type(); #ifdef VIENNACL_WITH_OPENCL if (mem_type_ == OPENCL_MEMORY) ocl_context_ptr_ = &viennacl::ocl::current_context(); else ocl_context_ptr_ = NULL; #endif } #ifdef VIENNACL_WITH_OPENCL context(viennacl::ocl::context const & ctx) : mem_type_(OPENCL_MEMORY), ocl_context_ptr_(&ctx) {} viennacl::ocl::context const & opencl_context() const { assert(mem_type_ == OPENCL_MEMORY && bool("Context type is not OpenCL")); return *ocl_context_ptr_; } #endif // TODO: Add CUDA and OpenMP contexts viennacl::memory_types memory_type() const { return mem_type_; } private: viennacl::memory_types mem_type_; #ifdef VIENNACL_WITH_OPENCL viennacl::ocl::context const * ocl_context_ptr_; #endif }; } #endif ViennaCL-1.5.1-src/viennacl/matrix.hpp000644 001750 001750 00000440432 12267307531 017611 0ustar00rupprupp000000 000000 #ifndef VIENNACL_MATRIX_HPP_ #define VIENNACL_MATRIX_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/matrix.hpp @brief Implementation of the dense matrix class */ #include "viennacl/forwards.h" #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/linalg/matrix_operations.hpp" #include "viennacl/linalg/sparse_matrix_operations.hpp" #include "viennacl/tools/tools.hpp" #include "viennacl/tools/matrix_size_deducer.hpp" #include "viennacl/meta/result_of.hpp" #include "viennacl/meta/enable_if.hpp" //#include "viennacl/rand/utils.hpp" #include "viennacl/traits/handle.hpp" namespace viennacl { /** @brief Base class for representing matrices where the individual entries are not all stored explicitly, e.g. identity_matrix<> * * Examples are identity_matrix, scalar_matrix, and zero_matrix. */ template class implicit_matrix_base { protected: typedef vcl_size_t size_type; implicit_matrix_base(size_type size1, size_type size2, std::pair value, bool diag) : size1_(size1), size2_(size2), value_(value), diag_(diag){ } public: typedef SCALARTYPE const & const_reference; typedef SCALARTYPE cpu_value_type; size_type size1() const { return size1_; } size_type size2() const { return size2_; } SCALARTYPE value() const { return value_.first; } bool is_value_static( ) const { return value_.second; } bool diag() const { return diag_; } const_reference operator()(size_type i, size_type j) const { if(diag_) return (i == j) ? value_.first : 0; return value_.first; } protected: size_type size1_; size_type size2_; std::pair value_; bool diag_; }; // // Initializer types // /** @brief Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initializer for viennacl::vector, vector_range, or vector_slize only. */ template class identity_matrix { public: typedef vcl_size_t size_type; typedef SCALARTYPE const & const_reference; identity_matrix(size_type s, viennacl::context ctx = viennacl::context()) : size_(s), diag_(1), off_diag_(0), ctx_(ctx) {} size_type size1() const { return size_; } size_type size2() const { return size_; } const_reference operator()(size_type i, size_type j) const { return (i == j) ? diag_ : off_diag_; } viennacl::context context() const { return ctx_; } private: size_type size_; SCALARTYPE diag_; SCALARTYPE off_diag_; viennacl::context ctx_; }; /** @brief Represents a vector consisting of zeros only. To be used as an initializer for viennacl::vector, vector_range, or vector_slize only. */ template class zero_matrix { public: typedef vcl_size_t size_type; typedef SCALARTYPE const & const_reference; zero_matrix(size_type s1, size_type s2, viennacl::context ctx = viennacl::context()) : size1_(s1), size2_(s2), val_(0), ctx_(ctx) {} size_type size1() const { return size1_; } size_type size2() const { return size2_; } const_reference operator()(size_type /*i*/, size_type /*j*/) const { return val_; } viennacl::context context() const { return ctx_; } private: size_type size1_; size_type size2_; SCALARTYPE val_; viennacl::context ctx_; }; /** @brief Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initializer for viennacl::vector, vector_range, or vector_slize only. */ template class scalar_matrix { public: typedef vcl_size_t size_type; typedef SCALARTYPE const & const_reference; scalar_matrix(size_type s1, size_type s2, const_reference val, viennacl::context ctx = viennacl::context()) : size1_(s1), size2_(s2), value_(val), ctx_(ctx) {} size_type size1() const { return size1_; } size_type size2() const { return size2_; } const_reference operator()(size_type /*i*/, size_type /*j*/) const { return value_; } viennacl::context context() const { return ctx_; } private: size_type size1_; size_type size2_; SCALARTYPE value_; viennacl::context ctx_; }; //#ifdef VIENNACL_WITH_OPENCL // template // rand::random_matrix_t random_matrix(unsigned int size1, unsigned int size2, DISTRIBUTION const & distribution){ // return rand::random_matrix_t(size1,size2,distribution); // } //#endif /** @brief Expression template class for representing a tree of expressions which ultimately result in a matrix. * * @tparam LHS The left hand side of the expression tree * @tparam RHS The right hand side of the expression tree * @tparam OP The operator to apply to LHS and RHS to obtain the result. */ template class matrix_expression { typedef typename viennacl::result_of::reference_if_nonscalar::type lhs_reference_type; typedef typename viennacl::result_of::reference_if_nonscalar::type rhs_reference_type; public: typedef vcl_size_t size_type; matrix_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {} /** @brief Get left hand side operand */ LHS & lhs() const { return lhs_; } /** @brief Get right hand side operand */ RHS & rhs() const { return rhs_; } /** @brief Returns the size of the result vector */ vcl_size_t size1() const { return viennacl::tools::MATRIX_SIZE_DEDUCER::size1(lhs_, rhs_); } vcl_size_t size2() const { return viennacl::tools::MATRIX_SIZE_DEDUCER::size2(lhs_, rhs_); } private: /** @brief The left hand side operand */ lhs_reference_type lhs_; /** @brief The right hand side operand */ rhs_reference_type rhs_; }; /** @brief A tag indicating iteration along increasing row index of a matrix */ struct row_iteration {}; /** @brief A tag indicating iteration along increasing columns index of a matrix */ struct col_iteration {}; //STL-like iterator. TODO: STL-compliance... /** @brief uBLAS-like iterator class for iterating over the entries of a dense matrix. */ template class matrix_iterator { typedef matrix_iterator self_type; public: typedef typename MATRIXTYPE::value_type value_type; matrix_iterator(MATRIXTYPE & mat, vcl_size_t start_row, vcl_size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {} value_type operator*(void) { return mat_(row_, col_); } self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER::apply(mat_, row_, col_); return *this; } self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; } bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); } bool operator!=(self_type const & other) { return !(*this == other); } vcl_size_t index1() { return row_; } vcl_size_t index2() { return col_; } MATRIXTYPE & operator()(void) const { return mat_; } private: MATRIXTYPE & mat_; vcl_size_t row_; vcl_size_t col_; }; /** @brief A dense matrix class * * @tparam SCALARTYPE The underlying scalar type (either float or double) * @tparam F Storage layout: Either row_major or column_major (at present only row_major is supported) * @tparam ALIGNMENT The internal memory size is given by (size()/ALIGNMENT + 1) * ALIGNMENT. ALIGNMENT must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. */ template class matrix_base { typedef matrix_base self_type; public: typedef matrix_iterator iterator1; typedef matrix_iterator iterator2; typedef scalar value_type; typedef SCALARTYPE cpu_value_type; typedef SizeType size_type; typedef DistanceType difference_type; typedef viennacl::backend::mem_handle handle_type; typedef F orientation_functor; typedef typename F::orientation_category orientation_category; static const size_type alignment = 128; /** @brief The default constructor. Does not allocate any memory. */ explicit matrix_base() : size1_(0), size2_(0), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(0), internal_size2_(0) {} /** @brief Creates the matrix with the given dimensions * * @param rows Number of rows * @param columns Number of columns * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) */ explicit matrix_base(size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(viennacl::tools::align_to_multiple(rows, alignment)), internal_size2_(viennacl::tools::align_to_multiple(columns, alignment)) { if (rows > 0 && columns > 0) { viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), ctx); clear(); } } /** @brief Constructor for creating a matrix_range or matrix_stride from some other matrix/matrix_range/matrix_stride */ explicit matrix_base(viennacl::backend::mem_handle & h, size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1, size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2) : size1_(mat_size1), size2_(mat_size2), start1_(mat_start1), start2_(mat_start2), stride1_(mat_stride1), stride2_(mat_stride2), internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2), elements_(h) {} template explicit matrix_base(matrix_expression const & proxy) : size1_(viennacl::traits::size1(proxy)), size2_(viennacl::traits::size2(proxy)), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(viennacl::tools::align_to_multiple(size1_, alignment)), internal_size2_(viennacl::tools::align_to_multiple(size2_, alignment)) { elements_.switch_active_handle_id(viennacl::traits::active_handle_id(proxy)); if (internal_size() > 0) { viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(proxy)); clear(); self_type::operator=(proxy); } } // CUDA or host memory: explicit matrix_base(SCALARTYPE * ptr_to_mem, viennacl::memory_types mem_type, size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1, size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2) : size1_(mat_size1), size2_(mat_size2), start1_(mat_start1), start2_(mat_start2), stride1_(mat_stride1), stride2_(mat_stride2), internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2) { if (mem_type == viennacl::CUDA_MEMORY) { #ifdef VIENNACL_WITH_CUDA elements_.switch_active_handle_id(viennacl::CUDA_MEMORY); elements_.cuda_handle().reset(reinterpret_cast(ptr_to_mem)); elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. #else throw cuda_not_available_exception(); #endif } else if (mem_type == viennacl::MAIN_MEMORY) { elements_.switch_active_handle_id(viennacl::MAIN_MEMORY); elements_.ram_handle().reset(reinterpret_cast(ptr_to_mem)); elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. } elements_.raw_size(sizeof(SCALARTYPE) * internal_size()); } #ifdef VIENNACL_WITH_OPENCL explicit matrix_base(cl_mem mem, size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(rows), internal_size2_(columns) { elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); elements_.opencl_handle() = mem; elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. elements_.opencl_handle().context(ctx.opencl_context()); elements_.raw_size(sizeof(SCALARTYPE)*internal_size()); } explicit matrix_base(cl_mem mem, viennacl::context ctx, size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1, size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2) : size1_(mat_size1), size2_(mat_size2), start1_(mat_start1), start2_(mat_start2), stride1_(mat_stride1), stride2_(mat_stride2), internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2) { elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); elements_.opencl_handle() = mem; elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. elements_.opencl_handle().context(ctx.opencl_context()); elements_.raw_size(sizeof(SCALARTYPE)*internal_size()); } #endif self_type & operator=(const self_type & other) //enables implicit conversions { if (internal_size() == 0) { if (other.internal_size() == 0) return *this; resize(other.size1(), other.size2(), false); } viennacl::linalg::am(*this, other, cpu_value_type(1.0), 1, false, false); return *this; } /** @brief Creates the matrix from the supplied random matrix. */ /*template matrix(rand::random_matrix_t const & m) : rows_(m.size1), columns_(m.size2) { if (internal_size() > 0) { viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size()); rand::buffer_dumper::dump(elements_,m.distribution,0,internal_size()); } }*/ /** @brief Implementation of the operation m1 = m2 @ alpha, where @ denotes either multiplication or division, and alpha is either a CPU or a GPU scalar * * @param proxy An expression template proxy class. */ template self_type & operator=(const matrix_expression & proxy) { assert( (viennacl::traits::size1(proxy) == size1() || size1() == 0) && (viennacl::traits::size2(proxy) == size2() || size2() == 0) && bool("Incompatible matrix sizes!")); if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0) { size1_ = viennacl::traits::size1(proxy); size2_ = viennacl::traits::size2(proxy); internal_size1_ = viennacl::tools::align_to_multiple(size1_, alignment); internal_size2_ = viennacl::tools::align_to_multiple(size2_, alignment); viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(proxy)); if (size1_ != internal_size1_ || size2_ != internal_size2_) clear(); } if (internal_size() > 0) linalg::detail::op_executor >::apply(*this, proxy); return *this; } // A = trans(B). Currently achieved in CPU memory self_type & operator=(const matrix_expression< const self_type, const self_type, op_trans> & proxy) { assert( (handle() != proxy.lhs().handle()) && bool("Self-assignment of matrix transpose not implemented")); assert( ( (proxy.lhs().size1() == size2()) || (size2() == 0) ) && bool("Matrix dimensions do not match!")); assert( ( (proxy.lhs().size2() == size1()) || (size1() == 0) ) && bool("Matrix dimensions do not match!")); if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0) { size1_ = viennacl::traits::size1(proxy); size2_ = viennacl::traits::size2(proxy); internal_size1_ = viennacl::tools::align_to_multiple(size1_, alignment); internal_size2_ = viennacl::tools::align_to_multiple(size2_, alignment); } std::vector temp(proxy.lhs().internal_size()); viennacl::backend::memory_read(proxy.lhs().handle(), 0, sizeof(SCALARTYPE)*proxy.lhs().internal_size(), &(temp[0])); // now transpose it std::vector temp_trans(internal_size()); for (vcl_size_t i=0; i self_type & operator+=(const matrix_expression & proxy) { assert( (viennacl::traits::size1(proxy) == size1()) && (viennacl::traits::size2(proxy) == size2()) && bool("Incompatible matrix sizes!")); assert( (size1() > 0) && bool("Vector not yet initialized!") ); assert( (size2() > 0) && bool("Vector not yet initialized!") ); linalg::detail::op_executor >::apply(*this, proxy); return *this; } template self_type & operator-=(const matrix_expression & proxy) { assert( (viennacl::traits::size1(proxy) == size1()) && (viennacl::traits::size2(proxy) == size2()) && bool("Incompatible matrix sizes!")); assert( (size1() > 0) && bool("Vector not yet initialized!") ); assert( (size2() > 0) && bool("Vector not yet initialized!") ); linalg::detail::op_executor >::apply(*this, proxy); return *this; } /** @brief Assigns the supplied identity matrix to the matrix. */ self_type & operator = (identity_matrix const & m) { assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); if (internal_size() == 0) { size1_ = m.size1(); size2_ = m.size2(); internal_size1_ = viennacl::tools::align_to_multiple(size1_, alignment); internal_size2_ = viennacl::tools::align_to_multiple(size2_, alignment); if (internal_size() > 0) { viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context()); clear(); } } else viennacl::linalg::matrix_assign(*this, SCALARTYPE(0)); if (internal_size() > 0) viennacl::linalg::matrix_diagonal_assign(*this, m(0,0)); return *this; } /** @brief Assigns the supplied zero matrix to the matrix. */ self_type & operator = (zero_matrix const & m) { assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); if (internal_size() == 0) { size1_ = m.size1(); size2_ = m.size2(); internal_size1_ = viennacl::tools::align_to_multiple(size1_, alignment); internal_size2_ = viennacl::tools::align_to_multiple(size2_, alignment); if (internal_size() > 0) { viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context()); clear(); } } else viennacl::linalg::matrix_assign(*this, SCALARTYPE(0)); return *this; } /** @brief Assigns the supplied scalar vector to the matrix. */ self_type & operator = (scalar_matrix const & m) { assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); if (internal_size() == 0) { size1_ = m.size1(); size2_ = m.size2(); internal_size1_ = viennacl::tools::align_to_multiple(size1_, alignment); internal_size2_ = viennacl::tools::align_to_multiple(size2_, alignment); if (internal_size() > 0) { viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context()); clear(); } } if (internal_size() > 0) { viennacl::linalg::matrix_assign(*this, m(0,0)); } return *this; } //read-write access to an element of the matrix/matrix_range/matrix_slice /** @brief Read-write access to a single element of the matrix/matrix_range/matrix_slice */ entry_proxy operator()(size_type row_index, size_type col_index) { return entry_proxy(F::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_); } /** @brief Read access to a single element of the matrix/matrix_range/matrix_slice */ const_entry_proxy operator()(size_type row_index, size_type col_index) const { return const_entry_proxy(F::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_); } // // Operator overloads for enabling implicit conversions: // self_type & operator += (const self_type & other) { viennacl::linalg::ambm(*this, *this, SCALARTYPE(1.0), 1, false, false, other, SCALARTYPE(1.0), 1, false, false); return *this; } self_type & operator -= (const self_type & other) { viennacl::linalg::ambm(*this, *this, SCALARTYPE(1.0), 1, false, false, other, SCALARTYPE(1.0), 1, false, true); return *this; } /** @brief Scales a matrix by a CPU scalar value */ self_type & operator *= (SCALARTYPE val) { //viennacl::linalg::inplace_mult(*this, val); viennacl::linalg::am(*this, *this, val, 1, false, false); return *this; } /** @brief Scales this matrix by a CPU scalar value */ self_type & operator /= (SCALARTYPE val) { //viennacl::linalg::inplace_mult(*this, static_cast(1) / val); viennacl::linalg::am(*this, *this, val, 1, true, false); return *this; } /** @brief Sign flip for the matrix. Emulated to be equivalent to -1.0 * matrix */ matrix_expression operator-() const { return matrix_expression(*this, SCALARTYPE(-1)); } /** @brief Returns the number of rows */ size_type size1() const { return size1_;} /** @brief Returns the number of columns */ size_type size2() const { return size2_; } /** @brief Returns the number of rows */ size_type start1() const { return start1_;} /** @brief Returns the number of columns */ size_type start2() const { return start2_; } /** @brief Returns the number of rows */ size_type stride1() const { return stride1_;} /** @brief Returns the number of columns */ size_type stride2() const { return stride2_; } /** @brief Resets all entries to zero */ void clear() { viennacl::linalg::matrix_assign(*this, SCALARTYPE(0), true); } /** @brief Returns the internal number of rows. Usually required for launching OpenCL kernels only */ size_type internal_size1() const { return internal_size1_; } /** @brief Returns the internal number of columns. Usually required for launching OpenCL kernels only */ size_type internal_size2() const { return internal_size2_; } /** @brief Returns the total amount of allocated memory in multiples of sizeof(SCALARTYPE) */ size_type internal_size() const { return internal_size1() * internal_size2(); } /** @brief Returns the OpenCL handle, non-const-version */ handle_type & handle() { return elements_; } /** @brief Returns the OpenCL handle, const-version */ const handle_type & handle() const { return elements_; } viennacl::memory_types memory_domain() const { return elements_.get_active_handle_id(); } protected: void set_handle(viennacl::backend::mem_handle const & h) { elements_ = h; } void switch_memory_context(viennacl::context new_ctx) { viennacl::backend::switch_memory_context(elements_, new_ctx); } /** @brief Resizes the matrix. * Existing entries can be preserved, but * * @param rows New number of rows * @param columns New number of columns * @param preserve If true, existing values are preserved. */ void resize(size_type rows, size_type columns, bool preserve = true) { assert( (rows > 0 && columns > 0) && bool("Check failed in matrix::resize(): Number of rows and columns must be positive!")); if (preserve && internal_size() > 0) { //get old entries: std::vector< SCALARTYPE > old_entries(internal_size()); viennacl::backend::memory_read(elements_, 0, sizeof(SCALARTYPE)*internal_size(), &(old_entries[0])); //set up entries of new matrix: std::vector< SCALARTYPE > new_entries( viennacl::tools::align_to_multiple(rows, alignment) * viennacl::tools::align_to_multiple(columns, alignment)); for (size_type i=0; i= size1_) continue; for (size_type j=0; j= size2_) continue; new_entries[F::mem_index(i, j, viennacl::tools::align_to_multiple(rows, alignment), viennacl::tools::align_to_multiple(columns, alignment))] = old_entries[F::mem_index(i, j, internal_size1(), internal_size2())]; } } //copy new entries to GPU: size1_ = rows; size2_ = columns; internal_size1_ = viennacl::tools::align_to_multiple(size1_, alignment); internal_size2_ = viennacl::tools::align_to_multiple(size2_, alignment); viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*new_entries.size(), viennacl::traits::context(elements_), &(new_entries[0])); } else //discard old entries: { size1_ = rows; size2_ = columns; internal_size1_ = viennacl::tools::align_to_multiple(size1_, alignment); internal_size2_ = viennacl::tools::align_to_multiple(size2_, alignment); viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(elements_)); clear(); } } private: size_type size1_; size_type size2_; size_type start1_; size_type start2_; difference_type stride1_; difference_type stride2_; size_type internal_size1_; size_type internal_size2_; handle_type elements_; }; //matrix /** @brief A dense matrix class * * @tparam SCALARTYPE The underlying scalar type (either float or double) * @tparam F Storage layout: Either row_major or column_major (at present only row_major is supported) * @tparam ALIGNMENT The internal memory size is given by (size()/ALIGNMENT + 1) * ALIGNMENT. ALIGNMENT must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. */ template class matrix : public matrix_base { typedef matrix self_type; typedef matrix_base base_type; public: typedef typename base_type::size_type size_type; /** @brief The default constructor. Does not allocate any memory. */ explicit matrix() : base_type() {} /** @brief Creates the matrix with the given dimensions * * @param rows Number of rows * @param columns Number of columns * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) */ explicit matrix(size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) : base_type(rows, columns, ctx) {} #ifdef VIENNACL_WITH_OPENCL explicit matrix(cl_mem mem, size_type rows, size_type columns) : base_type(mem, rows, columns) {} #endif template matrix(matrix_expression< LHS, RHS, OP> const & proxy) : base_type(proxy) {} /** @brief Creates the matrix from the supplied identity matrix. */ matrix(identity_matrix const & m) : base_type(m.size1(), m.size2(), m.context()) { if (base_type::internal_size() > 0) base_type::operator=(m); } /** @brief Creates the matrix from the supplied zero matrix. */ matrix(zero_matrix const & m) : base_type(m.size1(), m.size2(), m.context()) { if (base_type::internal_size() > 0) base_type::operator=(m); } /** @brief Creates the matrix from the supplied scalar matrix. */ matrix(scalar_matrix const & m) : base_type(m.size1(), m.size2(), m.context()) { if (base_type::internal_size() > 0) base_type::operator=(m); } matrix(const base_type & other) : base_type(other.size1(), other.size2(), viennacl::traits::context(other)) { base_type::operator=(other); } //copy constructor: matrix(const self_type & other) : base_type(other.size1(), other.size2(), viennacl::traits::context(other)) { base_type::operator=(other); } /*template self_type & operator=(const matrix_expression< const M1, const M1, op_trans> & proxy) { self_type temp(proxy.lhs()); *this = trans(temp); return *this; }*/ using base_type::operator=; /** @brief Resizes the matrix. * Existing entries can optionally be preserved * * @param rows New number of rows * @param columns New number of columns * @param preserve If true, existing values are preserved. */ void resize(size_type rows, size_type columns, bool preserve = true) { base_type::resize(rows, columns, preserve); } }; //matrix /** @brief Prints the matrix. Output is compatible to boost::numeric::ublas * * @param s STL output stream * @param gpu_matrix A dense ViennaCL matrix */ template std::ostream & operator<<(std::ostream & s, const matrix_base & gpu_matrix) { typedef typename matrix_base::size_type size_type; std::vector tmp(gpu_matrix.internal_size()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * gpu_matrix.internal_size(), &(tmp[0])); s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]"; s << "("; for (size_type i = 0; i < gpu_matrix.size1(); ++i) { s << "("; for (size_type j = 0; j < gpu_matrix.size2(); ++j) { s << tmp[F::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())]; if (j < gpu_matrix.size2() - 1) s << ","; } s << ")"; if (i < gpu_matrix.size1() - 1) s << ","; } s << ")"; return s; } /** @brief Prints the matrix. Output is compatible to boost::numeric::ublas * * @param s STL output stream * @param expr A matrix expression */ template std::ostream & operator<<(std::ostream & s, const matrix_expression & expr) { typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER< typename tools::CONST_REMOVER::ResultType >::ResultType ScalarType; matrix temp = expr; s << temp; return s; } /** @brief Returns an expression template class representing a transposed matrix */ template matrix_expression< const matrix_base, const matrix_base, op_trans> trans(const matrix_base & mat) { return matrix_expression< const matrix_base, const matrix_base, op_trans>(mat, mat); } //diag(): template vector_expression< const matrix_base, const int, op_matrix_diag> diag(const matrix_base & A, int k = 0) { return vector_expression< const matrix_base, const int, op_matrix_diag>(A, k); } template matrix_expression< const vector_base, const int, op_vector_diag> diag(const vector_base & v, int k = 0) { return matrix_expression< const vector_base, const int, op_vector_diag>(v, k); } // row(): template vector_expression< const matrix_base, const unsigned int, op_row> row(const matrix_base & A, unsigned int i) { return vector_expression< const matrix_base, const unsigned int, op_row>(A, i); } // column(): template vector_expression< const matrix_base, const unsigned int, op_column> column(const matrix_base & A, unsigned int j) { return vector_expression< const matrix_base, const unsigned int, op_column>(A, j); } /////////////////////// transfer operations: ////////////////////////////////////// // //cpu to gpu, generic type: // /** @brief Copies a dense matrix from the host (CPU) to the OpenCL device (GPU or multi-core CPU) * * @param cpu_matrix A dense matrix on the host. Type requirements: .size1() returns number of rows, .size2() returns number of columns. Access to entries via operator() * @param gpu_matrix A dense ViennaCL matrix */ template void copy(const CPU_MATRIX & cpu_matrix, matrix & gpu_matrix ) { typedef typename matrix::size_type size_type; //std::cout << "Copying CPU_MATRIX!" << std::endl; //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl; if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) { gpu_matrix.resize(cpu_matrix.size1(), cpu_matrix.size2(), false); } assert( (gpu_matrix.size1() == cpu_matrix.size1()) && (gpu_matrix.size2() == cpu_matrix.size2()) && bool("Matrix dimensions mismatch.") ); std::vector data(gpu_matrix.internal_size()); for (size_type i = 0; i < gpu_matrix.size1(); ++i) { for (size_type j = 0; j < gpu_matrix.size2(); ++j) data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j); } viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0])); //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); //std::cout << "Size at end: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl; } // //cpu to gpu, STL type: // /** @brief Copies a dense STL-type matrix from the host (CPU) to the OpenCL device (GPU or multi-core CPU) * * @param cpu_matrix A dense matrix on the host of type std::vector< std::vector<> >. cpu_matrix[i][j] returns the element in the i-th row and j-th columns (both starting with zero) * @param gpu_matrix A dense ViennaCL matrix */ template void copy(const std::vector< std::vector, A2> & cpu_matrix, matrix & gpu_matrix ) { typedef typename matrix::size_type size_type; if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) { gpu_matrix.resize(cpu_matrix.size(), cpu_matrix[0].size(), false); } assert( (gpu_matrix.size1() == cpu_matrix.size()) && bool("Matrix dimensions mismatch.") ); std::vector data(gpu_matrix.internal_size()); for (size_type i = 0; i < gpu_matrix.size1(); ++i) { assert( (gpu_matrix.size2() == cpu_matrix[i].size()) && bool("Matrix dimensions mismatch.") ); for (size_type j = 0; j < gpu_matrix.size2(); ++j) data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j]; } viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0])); //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); } // //cpu to gpu, another STL type: // /** @brief Copies a dense matrix from the host (CPU) to the OpenCL device (GPU or multi-core CPU) without temporary. Matrix-Layout on CPU must be equal to the matrix-layout on the GPU. * * @param cpu_matrix_begin Pointer to the first matrix entry. Cf. iterator concept in STL * @param cpu_matrix_end Pointer past the last matrix entry. Cf. iterator concept in STL * @param gpu_matrix A dense ViennaCL matrix */ template void fast_copy(SCALARTYPE * cpu_matrix_begin, SCALARTYPE * cpu_matrix_end, matrix & gpu_matrix) { viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin), viennacl::traits::context(gpu_matrix), cpu_matrix_begin); /*gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin), cpu_matrix_begin);*/ } #ifdef VIENNACL_WITH_EIGEN /** @brief Copies a dense Eigen matrix from the host (CPU) to the OpenCL device (GPU or multi-core CPU) * * @param cpu_matrix A dense MTL matrix. cpu_matrix(i, j) returns the element in the i-th row and j-th columns (both starting with zero) * @param gpu_matrix A dense ViennaCL matrix */ template void copy(const Eigen::MatrixXf & cpu_matrix, matrix & gpu_matrix) { typedef typename matrix::size_type size_type; if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) { gpu_matrix.resize(cpu_matrix.rows(), cpu_matrix.cols(), false); } else { assert( (gpu_matrix.size1() == static_cast(cpu_matrix.rows())) && (gpu_matrix.size2() == static_cast(cpu_matrix.cols())) && bool("matrix size mismatch") ); } std::vector data(gpu_matrix.internal_size()); for (size_type i = 0; i < gpu_matrix.size1(); ++i) { for (size_type j = 0; j < gpu_matrix.size2(); ++j) data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j); } viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(float) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0])); //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); } /** @brief Copies a dense Eigen matrix from the host (CPU) to the OpenCL device (GPU or multi-core CPU) * * @param cpu_matrix A dense MTL matrix. cpu_matrix(i, j) returns the element in the i-th row and j-th columns (both starting with zero) * @param gpu_matrix A dense ViennaCL matrix */ template void copy(const Eigen::MatrixXd & cpu_matrix, matrix & gpu_matrix) { typedef typename matrix::size_type size_type; if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) { gpu_matrix.resize(cpu_matrix.rows(), cpu_matrix.cols(), false); } else { assert( (gpu_matrix.size1() == static_cast(cpu_matrix.rows())) && (gpu_matrix.size2() == static_cast(cpu_matrix.cols())) && bool("matrix size mismatch") ); } std::vector data(gpu_matrix.internal_size()); for (size_type i = 0; i < gpu_matrix.size1(); ++i) { for (size_type j = 0; j < gpu_matrix.size2(); ++j) data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j); } viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(double) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0])); //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); } #endif #ifdef VIENNACL_WITH_MTL4 /** @brief Copies a dense MTL matrix from the host (CPU) to the OpenCL device (GPU or multi-core CPU) * * @param cpu_matrix A dense MTL matrix. cpu_matrix(i, j) returns the element in the i-th row and j-th columns (both starting with zero) * @param gpu_matrix A dense ViennaCL matrix */ template void copy(const mtl::dense2D& cpu_matrix, matrix & gpu_matrix) { typedef typename matrix::size_type size_type; if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) { gpu_matrix.resize(cpu_matrix.num_rows(), cpu_matrix.num_cols(), false); } else { assert( (gpu_matrix.size1() == cpu_matrix.num_rows()) && (gpu_matrix.size2() == cpu_matrix.num_cols()) && bool("matrix size mismatch") ); } std::vector data(gpu_matrix.internal_size()); for (size_type i = 0; i < gpu_matrix.size1(); ++i) { for (size_type j = 0; j < gpu_matrix.size2(); ++j) data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j]; } viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0])); //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); } #endif // //gpu to cpu, generic type // /** @brief Copies a dense matrix from the OpenCL device (GPU or multi-core CPU) to the host (CPU). * * @param gpu_matrix A dense ViennaCL matrix * @param cpu_matrix A dense memory on the host. Must have at least as many rows and columns as the gpu_matrix! Type requirement: Access to entries via operator() */ template void copy(const matrix & gpu_matrix, CPU_MATRIX & cpu_matrix ) { typedef typename matrix::size_type size_type; if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) ) { assert( viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1() && bool("Matrix dimensions mismatch: rows")); std::vector temp_buffer(gpu_matrix.internal_size()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0])); //now copy entries to cpu_matrix: for (size_type i = 0; i < gpu_matrix.size1(); ++i) { assert( viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2() && bool("Matrix dimensions mismatch: columns")); for (size_type j = 0; j < gpu_matrix.size2(); ++j) cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())]; } } } //gpu to cpu, STL type /** @brief Copies a dense matrix from the OpenCL device (GPU or multi-core CPU) to the host (CPU). * * @param gpu_matrix A dense ViennaCL matrix * @param cpu_matrix A dense memory on the host using STL types, typically std::vector< std::vector<> > Must have at least as many rows and columns as the gpu_matrix! Type requirement: Access to entries via operator() */ template void copy(const matrix & gpu_matrix, std::vector< std::vector, A2> & cpu_matrix) { typedef typename matrix::size_type size_type; if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) ) { assert( (cpu_matrix.size() == gpu_matrix.size1()) && bool("Matrix dimensions mismatch: rows")); std::vector temp_buffer(gpu_matrix.internal_size()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0])); //now copy entries to cpu_matrix: for (size_type i = 0; i < gpu_matrix.size1(); ++i) { assert( (cpu_matrix[i].size() == gpu_matrix.size2()) && bool("Matrix dimensions mismatch: columns")); for (size_type j = 0; j < gpu_matrix.size2(); ++j) cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())]; } } } //gpu to cpu, STL type /** @brief Copies a dense matrix from the OpenCL device (GPU or multi-core CPU) to the host (CPU). * * @param gpu_matrix A dense ViennaCL matrix * @param cpu_matrix_begin Pointer to the output memory on the CPU. User must ensure that provided memory is large enough. */ template void fast_copy(const matrix & gpu_matrix, SCALARTYPE * cpu_matrix_begin) { viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), cpu_matrix_begin); } /////////////////////// matrix operator overloads to follow //////////////////////////////////////////// // operator + /** @brief Generic 'catch-all' overload, which enforces a temporary if the expression tree gets too deep. */ template matrix_expression< const matrix_expression, const matrix_expression, op_add> operator + (matrix_expression const & proxy1, matrix_expression const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) && bool("Incompatible matrix sizes!")); return matrix_expression< const matrix_expression, const matrix_expression, op_add>(proxy1, proxy2); } template matrix_expression< const matrix_expression, const matrix_base, op_add> operator + (matrix_expression const & proxy1, matrix_base const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) && bool("Incompatible matrix sizes!")); return matrix_expression< const matrix_expression, const matrix_base, op_add>(proxy1, proxy2); } template matrix_expression< const matrix_base, const matrix_expression, op_add> operator + (matrix_base const & proxy1, matrix_expression const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) && bool("Incompatible matrix sizes!")); return matrix_expression< const matrix_base, const matrix_expression, op_add>(proxy1, proxy2); } /** @brief Operator overload for m1 + m2, where m1 and m2 are either dense matrices, matrix ranges, or matrix slices. No mixing of different storage layouts allowed at the moment. */ template matrix_expression< const matrix_base, const matrix_base, op_add > operator + (const matrix_base & m1, const matrix_base & m2) { return matrix_expression< const matrix_base, const matrix_base, op_add > (m1, m2); } // operator - template matrix_expression< const matrix_expression, const matrix_expression, op_sub> operator - (matrix_expression const & proxy1, matrix_expression const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) && bool("Incompatible matrix sizes!")); return matrix_expression< const matrix_expression, const matrix_expression, op_sub>(proxy1, proxy2); } template matrix_expression< const matrix_expression, const matrix_base, op_sub> operator - (matrix_expression const & proxy1, matrix_base const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) && bool("Incompatible matrix sizes!")); return matrix_expression< const matrix_expression, const matrix_base, op_sub>(proxy1, proxy2); } template matrix_expression< const matrix_base, const matrix_expression, op_sub> operator - (matrix_base const & proxy1, matrix_expression const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) && bool("Incompatible matrix sizes!")); return matrix_expression< const matrix_base, const matrix_expression, op_sub>(proxy1, proxy2); } /** @brief Operator overload for m1 - m2, where m1 and m2 are either dense matrices, matrix ranges, or matrix slices. No mixing of different storage layouts allowed at the moment. */ template matrix_expression< const matrix_base, const matrix_base, op_sub > operator - (const matrix_base & m1, const matrix_base & m2) { return matrix_expression< const matrix_base, const matrix_base, op_sub > (m1, m2); } // operator * /** @brief Operator overload for the expression alpha * m1, where alpha is a host scalar (float or double) and m1 is a ViennaCL matrix. * * @param value The host scalar (float or double) * @param m1 A ViennaCL matrix */ template typename viennacl::enable_if< viennacl::is_any_scalar::value, matrix_expression< const matrix_base, const S1, op_mult> >::type operator * (S1 const & value, matrix_base const & m1) { return matrix_expression< const matrix_base, const S1, op_mult>(m1, value); } /** @brief Operator overload for the multiplication of a matrix expression with a scalar from the right, e.g. (beta * m1) * alpha. Here, beta * m1 is wrapped into a matrix_expression and then multiplied with alpha from the right. * * @param proxy Left hand side matrix expression * @param val Right hand side scalar */ template typename viennacl::enable_if< viennacl::is_any_scalar::value, matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type operator * (matrix_expression< LHS, RHS, OP> const & proxy, S1 const & val) { return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val); } /** @brief Operator overload for the multiplication of a matrix expression with a ViennaCL scalar from the left, e.g. alpha * (beta * m1). Here, beta * m1 is wrapped into a matrix_expression and then multiplied with alpha from the left. * * @param val Right hand side scalar * @param proxy Left hand side matrix expression */ template typename viennacl::enable_if< viennacl::is_any_scalar::value, matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type operator * (S1 const & val, matrix_expression< LHS, RHS, OP> const & proxy) { return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val); } /** @brief Scales the matrix by a GPU scalar 'alpha' and returns an expression template */ template typename viennacl::enable_if< viennacl::is_any_scalar::value, matrix_expression< const matrix_base, const S1, op_mult> >::type operator * (matrix_base const & m1, S1 const & s1) { return matrix_expression< const matrix_base, const S1, op_mult>(m1, s1); } // operator *= /** @brief Scales a matrix by a GPU scalar value */ template typename viennacl::enable_if< viennacl::is_scalar::value, matrix_base & >::type operator *= (matrix_base & m1, S1 const & gpu_val) { //viennacl::linalg::inplace_mult(*this, gpu_val); viennacl::linalg::am(m1, m1, gpu_val, 1, false, (viennacl::is_flip_sign_scalar::value ? true : false)); return m1; } // operator / /** @brief Operator overload for the division of a matrix expression by a scalar from the right, e.g. (beta * m1) / alpha. Here, beta * m1 is wrapped into a matrix_expression and then divided by alpha. * * @param proxy Left hand side matrix expression * @param val Right hand side scalar */ template typename viennacl::enable_if< viennacl::is_any_scalar::value, matrix_expression< const matrix_expression, const S1, op_div> >::type operator / (matrix_expression const & proxy, S1 const & val) { return matrix_expression< const matrix_expression, const S1, op_div>(proxy, val); } /** @brief Returns an expression template for scaling the matrix by a GPU scalar 'alpha' */ template typename viennacl::enable_if< viennacl::is_any_scalar::value, matrix_expression< const matrix_base, const S1, op_div> >::type operator / (matrix_base const & m1, S1 const & s1) { return matrix_expression< const matrix_base, const S1, op_div>(m1, s1); } // operator /= /** @brief Scales a matrix by a GPU scalar value */ template typename viennacl::enable_if< viennacl::is_scalar::value, matrix_base & >::type operator /= (matrix_base & m1, S1 const & gpu_val) { //viennacl::linalg::inplace_divide(*this, gpu_val); viennacl::linalg::am(m1, m1, gpu_val, 1, true, (viennacl::is_flip_sign_scalar::value ? true : false)); return m1; } // outer_prod(v1, v2) * val; template typename viennacl::enable_if< viennacl::is_scalar::value, viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult> >::type operator*(const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy, const S1 & val) { return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult>(proxy, val); } template typename viennacl::enable_if< viennacl::is_cpu_scalar::value, viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult> >::type operator*(const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy, const S1 & val) { return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult>(proxy, NumericT(val)); } // val * outer_prod(v1, v2); template typename viennacl::enable_if< viennacl::is_scalar::value, viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult> >::type operator*(const S1 & val, const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy) { return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult>(proxy, val); } template typename viennacl::enable_if< viennacl::is_cpu_scalar::value, viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult> >::type operator*(const S1 & val, const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy) { return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult>(proxy, NumericT(val)); } // // Specify available operations: // /** \cond */ namespace linalg { namespace detail { // x = y template struct op_executor, op_assign, matrix_base > { static void apply(matrix_base & lhs, matrix_base const & rhs) { viennacl::linalg::am(lhs, rhs, T(1), 1, false, false); } }; // x += y template struct op_executor, op_inplace_add, matrix_base > { static void apply(matrix_base & lhs, matrix_base const & rhs) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, false); } }; // x -= y template struct op_executor, op_inplace_sub, matrix_base > { static void apply(matrix_base & lhs, matrix_base const & rhs) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, true); } }; ///////////// x OP y * alpha //////////////////////// // x = alpha * y template struct op_executor, op_assign, matrix_expression, const ScalarType, op_mult> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, false, false); } }; // x += alpha * y template struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_mult> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, false); } }; // x -= alpha * y template struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_mult> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, true); } }; ///////////// x OP vec_expr * alpha //////////////////////// // x = alpha * vec_expr template struct op_executor, op_assign, matrix_expression, const ScalarType, op_mult> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { matrix temp(proxy.lhs()); lhs = temp * proxy.rhs(); } }; // x += alpha * vec_expr template struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_mult> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { matrix temp(proxy.lhs()); lhs += temp * proxy.rhs(); } }; // x -= alpha * vec_expr template struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_mult> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { matrix temp(proxy.lhs()); lhs -= temp * proxy.rhs(); } }; ///////////// x OP y / alpha //////////////////////// // x = y / alpha template struct op_executor, op_assign, matrix_expression, const ScalarType, op_div> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, true, false); } }; // x += y / alpha template struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_div> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, false); } }; // x -= y / alpha template struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_div> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, true); } }; ///////////// x OP vec_expr / alpha //////////////////////// // x = vec_expr / alpha template struct op_executor, op_assign, matrix_expression, const ScalarType, op_div> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { matrix temp(proxy.lhs()); lhs = temp / proxy.rhs(); } }; // x += vec_expr / alpha template struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_div> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { matrix temp(proxy.lhs()); lhs += temp / proxy.rhs(); } }; // x -= vec_expr / alpha template struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_div> > { static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { matrix temp(proxy.lhs()); lhs -= temp / proxy.rhs(); } }; // generic x = vec_expr1 + vec_expr2: template struct op_executor, op_assign, matrix_expression > { // generic x = vec_expr1 + vec_expr2: template static void apply(matrix_base & lhs, matrix_expression const & proxy) { bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); if (op_aliasing_lhs || op_aliasing_rhs) { matrix_base temp(proxy.lhs()); op_executor, op_inplace_add, RHS>::apply(temp, proxy.rhs()); lhs = temp; } else { op_executor, op_assign, LHS>::apply(lhs, proxy.lhs()); op_executor, op_inplace_add, RHS>::apply(lhs, proxy.rhs()); } } // x = y + z static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_add> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs(), T(1), 1, false, false); } // x = alpha * y + z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult>, const matrix_base, op_add> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs(), T(1), 1, false, false); } // x = y / alpha + z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div>, const matrix_base, op_add> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs(), T(1), 1, false, false); } // x = y + beta * z template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_mult>, op_add> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); } // x = y + z / beta template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_div>, op_add> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); } // x = alpha * y + beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_mult>, op_add> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); } // x = alpha * y + z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_div>, op_add> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); } // x = y / alpha + beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_mult>, op_add> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); } // x = y / alpha + z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_div>, op_add> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); } }; // dense = sparse * dense template struct op_executor, op_assign, matrix_expression > { template < typename SparseMatrixType, typename F2 > static void apply(matrix_base & lhs, matrix_expression, viennacl::op_prod> const & proxy) { viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs); } // dense = sparse * trans(dense) template < typename SparseMatrixType, typename F2 > static void apply(matrix_base & lhs, matrix_expression, const viennacl::matrix_base, viennacl::op_trans >, viennacl::op_prod> const & proxy) { viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs); } }; // generic x += vec_expr1 + vec_expr2: template struct op_executor, op_inplace_add, matrix_expression > { // generic x += vec_expr1 + vec_expr2: template static void apply(matrix_base & lhs, matrix_expression const & proxy) { bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); if (op_aliasing_lhs || op_aliasing_rhs) { matrix_base temp(proxy.lhs()); op_executor, op_inplace_add, RHS>::apply(temp, proxy.rhs()); lhs += temp; } else { op_executor, op_inplace_add, LHS>::apply(lhs, proxy.lhs()); op_executor, op_inplace_add, RHS>::apply(lhs, proxy.rhs()); } } // x += y + z static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs(), T(1), 1, false, false); } // x += alpha * y + z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult>, const matrix_base, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs(), T(1), 1, false, false); } // x += y / alpha + z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div>, const matrix_base, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs(), T(1), 1, false, false); } // x += y + beta * z template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_mult>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); } // x += y + z / beta template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_div>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); } // x += alpha * y + beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_mult>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); } // x += alpha * y + z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_div>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); } // x += y / alpha + beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_mult>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); } // x += y / alpha + z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_div>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); } }; // generic x -= vec_expr1 + vec_expr2: template struct op_executor, op_inplace_sub, matrix_expression > { // generic x -= vec_expr1 + vec_expr2: template static void apply(matrix_base & lhs, matrix_expression const & proxy) { bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); if (op_aliasing_lhs || op_aliasing_rhs) { matrix_base temp(proxy.lhs()); op_executor, op_inplace_add, RHS>::apply(temp, proxy.rhs()); lhs -= temp; } else { op_executor, op_inplace_sub, LHS>::apply(lhs, proxy.lhs()); op_executor, op_inplace_sub, RHS>::apply(lhs, proxy.rhs()); } } // x -= y + z static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, true, proxy.rhs(), T(1), 1, false, true); } // x -= alpha * y + z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult>, const matrix_base, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, proxy.rhs(), T(1), 1, false, true); } // x -= y / alpha + z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div>, const matrix_base, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, proxy.rhs(), T(1), 1, false, true); } // x -= y + beta * z template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_mult>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); } // x -= y + z / beta template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_div>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); } // x -= alpha * y + beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_mult>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); } // x -= alpha * y + z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_div>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); } // x -= y / alpha + beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_mult>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); } // x -= y / alpha + z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_div>, op_add> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); } }; /////////////////////// // generic x = vec_expr1 - vec_expr2: template struct op_executor, op_assign, matrix_expression > { // generic x = vec_expr1 - vec_expr2: template static void apply(matrix_base & lhs, matrix_expression const & proxy) { bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); if (op_aliasing_lhs || op_aliasing_rhs) { matrix_base temp(proxy.lhs()); op_executor, op_inplace_sub, RHS>::apply(temp, proxy.rhs()); lhs = temp; } else { op_executor, op_assign, LHS>::apply(lhs, proxy.lhs()); op_executor, op_inplace_sub, RHS>::apply(lhs, proxy.rhs()); } } // x = y - z static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_sub> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs(), T(1), 1, false, true); } // x = alpha * y - z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult>, const matrix_base, op_sub> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs(), T(1), 1, false, true); } // x = y / alpha - z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div>, const matrix_base, op_sub> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs(), T(1), 1, false, true); } // x = y - beta * z template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_mult>, op_sub> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); } // x = y - z / beta template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_div>, op_sub> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); } // x = alpha * y - beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_mult>, op_sub> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); } // x = alpha * y - z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_div>, op_sub> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); } // x = y / alpha - beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_mult>, op_sub> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); } // x = y / alpha - z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_div>, op_sub> const & proxy) { viennacl::linalg::ambm(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); } }; // generic x += vec_expr1 - vec_expr2: template struct op_executor, op_inplace_add, matrix_expression > { // generic x += vec_expr1 - vec_expr2: template static void apply(matrix_base & lhs, matrix_expression const & proxy) { bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); if (op_aliasing_lhs || op_aliasing_rhs) { matrix_base temp(proxy.lhs()); op_executor, op_inplace_sub, RHS>::apply(temp, proxy.rhs()); lhs += temp; } else { op_executor, op_inplace_add, LHS>::apply(lhs, proxy.lhs()); op_executor, op_inplace_sub, RHS>::apply(lhs, proxy.rhs()); } } // x += y - z static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs(), T(1), 1, false, true); } // x += alpha * y - z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult>, const matrix_base, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs(), T(1), 1, false, true); } // x += y / alpha - z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div>, const matrix_base, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs(), T(1), 1, false, true); } // x += y - beta * z template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_mult>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); } // x += y - z / beta template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_div>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); } // x += alpha * y - beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_mult>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); } // x += alpha * y - z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_div>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); } // x += y / alpha - beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_mult>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true); } // x += y / alpha - z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_div>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true); } }; // generic x -= vec_expr1 - vec_expr2: template struct op_executor, op_inplace_sub, matrix_expression > { // generic x -= vec_expr1 - vec_expr2: template static void apply(matrix_base & lhs, matrix_expression const & proxy) { bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); if (op_aliasing_lhs || op_aliasing_rhs) { matrix_base temp(proxy.lhs()); op_executor, op_inplace_sub, RHS>::apply(temp, proxy.rhs()); lhs -= temp; } else { op_executor, op_inplace_sub, LHS>::apply(lhs, proxy.lhs()); op_executor, op_inplace_add, RHS>::apply(lhs, proxy.rhs()); } } // x -= y - z static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, true, proxy.rhs(), T(1), 1, false, false); } // x -= alpha * y - z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult>, const matrix_base, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, proxy.rhs(), T(1), 1, false, false); } // x -= y / alpha - z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div>, const matrix_base, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, proxy.rhs(), T(1), 1, false, false); } // x -= y - beta * z template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_mult>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); } // x -= y - z / beta template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const ScalarType, op_div>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs(), T(1), 1, false, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); } // x -= alpha * y - beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_mult>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); } // x -= alpha * y - z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_mult>, const matrix_expression, const ScalarType2, op_div>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); } // x -= y / alpha - beta * z template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_mult>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false); } // x -= y / alpha - z / beta template static void apply(matrix_base & lhs, matrix_expression, const ScalarType1, op_div>, const matrix_expression, const ScalarType2, op_div>, op_sub> const & proxy) { viennacl::linalg::ambm_m(lhs, proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true, proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false); } }; //////////////////// diag(), row(), column() operations //////////////////////////////////////// template struct op_executor, op_assign, matrix_expression > { static void apply(matrix_base & lhs, matrix_expression, const int, op_vector_diag> const & proxy) { viennacl::linalg::matrix_diag_from_vector(proxy.lhs(), proxy.rhs(), lhs); } }; template struct op_executor, op_assign, vector_expression > { template static void apply(vector_base & lhs, vector_expression, const int, op_matrix_diag> const & proxy) { viennacl::linalg::matrix_diag_to_vector(proxy.lhs(), proxy.rhs(), lhs); } }; template struct op_executor, op_assign, vector_expression > { template static void apply(vector_base & lhs, vector_expression, const unsigned int, op_row> const & proxy) { viennacl::linalg::matrix_row(proxy.lhs(), proxy.rhs(), lhs); } }; template struct op_executor, op_assign, vector_expression > { template static void apply(vector_base & lhs, vector_expression, const unsigned int, op_column> const & proxy) { viennacl::linalg::matrix_column(proxy.lhs(), proxy.rhs(), lhs); } }; //////////////////// Element-wise operations //////////////////////////////////////// // generic x = mat_expr1 .* mat_expr2: template struct op_executor, op_assign, matrix_expression > > { // x = y .* z static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_element_binary > const & proxy) { viennacl::linalg::element_op(lhs, proxy); } // x = y .* mat_expr template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_element_binary > const & proxy) { matrix temp(proxy.rhs()); viennacl::linalg::element_op(lhs, viennacl::matrix_expression, const matrix_base, op_element_binary >(proxy.lhs(), temp)); } // x = mat_expr .* z template static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_element_binary > const & proxy) { matrix temp(proxy.lhs()); viennacl::linalg::element_op(lhs, viennacl::matrix_expression, const matrix_base, op_element_binary >(temp, proxy.rhs())); } // x = mat_expr .* mat_expr template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_element_binary > const & proxy) { matrix temp1(proxy.lhs()); matrix temp2(proxy.rhs()); viennacl::linalg::element_op(lhs, viennacl::matrix_expression, const matrix_base, op_element_binary >(temp1, temp2)); } }; // generic x += mat_expr .* mat_expr: template struct op_executor, op_inplace_add, matrix_expression > > { // x += y .* z static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_element_binary > const & proxy) { viennacl::matrix temp(proxy); lhs += temp; } // x += y .* mat_expr template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_element_binary > const & proxy) { matrix temp(proxy.rhs()); matrix temp2(temp.size1(), temp.size2()); viennacl::linalg::element_op(temp2, viennacl::matrix_expression, const matrix_base, op_element_binary >(proxy.lhs(), temp)); lhs += temp2; } // x += mat_expr .* z template static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_element_binary > const & proxy) { matrix temp(proxy.lhs()); matrix temp2(temp.size1(), temp.size2()); viennacl::linalg::element_op(temp2, viennacl::matrix_expression, const matrix_base, op_element_binary >(temp, proxy.rhs())); lhs += temp2; } // x += mat_expr .* mat_expr template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_element_binary > const & proxy) { matrix temp1(proxy.lhs()); matrix temp2(proxy.rhs()); matrix temp3(temp1.size1(), temp1.size2()); viennacl::linalg::element_op(temp3, viennacl::matrix_expression, const matrix_base, op_element_binary >(temp1, temp2)); lhs += temp3; } }; // generic x -= mat_expr1 .* mat_expr2: template struct op_executor, op_inplace_sub, matrix_expression > > { // x -= y .* z static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_element_binary > const & proxy) { viennacl::matrix temp(proxy); lhs -= temp; } // x -= y .* mat_expr template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_element_binary > const & proxy) { matrix temp(proxy.rhs()); matrix temp2(temp.size1(), temp.size2()); viennacl::linalg::element_op(temp2, viennacl::matrix_expression, const matrix_base, op_element_binary >(proxy.lhs(), temp)); lhs -= temp2; } // x -= mat_expr .* z template static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_element_binary > const & proxy) { matrix temp(proxy.lhs()); matrix temp2(temp.size1(), temp.size2()); viennacl::linalg::element_op(temp2, viennacl::matrix_expression, const matrix_base, op_element_binary >(temp, proxy.rhs())); lhs -= temp2; } // x -= mat_expr .* mat_expr template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_element_binary > const & proxy) { matrix temp1(proxy.lhs()); matrix temp2(proxy.rhs()); matrix temp3(temp1.size1(), temp1.size2()); viennacl::linalg::element_op(temp3, viennacl::matrix_expression, const matrix_base, op_element_binary >(temp1, temp2)); lhs -= temp3; } }; //////////////// unary expressions template struct op_executor, op_assign, matrix_expression > > { // x = OP(y) static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_element_unary > const & proxy) { viennacl::linalg::element_op(lhs, proxy); } // x = OP(vec_expr) template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_element_unary > const & proxy) { matrix temp(proxy.rhs()); viennacl::linalg::element_op(lhs, viennacl::matrix_expression, const matrix_base, op_element_unary >(temp, temp)); } }; template struct op_executor, op_inplace_add, matrix_expression > > { // x += OP(y) static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_element_unary > const & proxy) { matrix temp(proxy); lhs += temp; } // x += OP(vec_expr) template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_element_unary > const & proxy) { matrix temp(proxy.rhs()); viennacl::linalg::element_op(temp, viennacl::matrix_expression, const matrix_base, op_element_unary >(temp, temp)); // inplace operation is safe here lhs += temp; } }; template struct op_executor, op_inplace_sub, matrix_expression > > { // x -= OP(y) static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_element_unary > const & proxy) { matrix temp(proxy); lhs -= temp; } // x -= OP(vec_expr) template static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_element_unary > const & proxy) { matrix temp(proxy.rhs()); viennacl::linalg::element_op(temp, viennacl::matrix_expression, const matrix_base, op_element_unary >(temp, temp)); // inplace operation is safe here lhs -= temp; } }; //////////////// Matrix - Matrix products //////////////// // C = A * B template struct op_executor, op_assign, matrix_expression, const matrix_base, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0)); } }; // C = A * B^T template struct op_executor, op_assign, matrix_expression, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0)); } }; // C = A^T * B template struct op_executor, op_assign, matrix_expression, const matrix_base, op_trans>, const matrix_base, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans>, const matrix_base, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0)); } }; // C = A^T * B^T template struct op_executor, op_assign, matrix_expression, const matrix_base, op_trans>, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans>, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0)); } }; // C += A * B template struct op_executor, op_inplace_add, matrix_expression, const matrix_base, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0)); } }; // C += A * B^T template struct op_executor, op_inplace_add, matrix_expression, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0)); } }; // C += A^T * B template struct op_executor, op_inplace_add, matrix_expression, const matrix_base, op_trans>, const matrix_base, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans>, const matrix_base, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0)); } }; // C += A^T * B^T template struct op_executor, op_inplace_add, matrix_expression, const matrix_base, op_trans>, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans>, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0)); } }; // C -= A * B template struct op_executor, op_inplace_sub, matrix_expression, const matrix_base, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0)); } }; // C -= A * B^T template struct op_executor, op_inplace_sub, matrix_expression, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0)); } }; // C -= A^T * B template struct op_executor, op_inplace_sub, matrix_expression, const matrix_base, op_trans>, const matrix_base, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans>, const matrix_base, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0)); } }; // C -= A^T * B^T template struct op_executor, op_inplace_sub, matrix_expression, const matrix_base, op_trans>, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans>, const matrix_expression, const matrix_base, op_trans>, op_mat_mat_prod> const & rhs) { viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0)); } }; ////////////////// Matrix-Vector Products /////////////// // y = A * x template struct op_executor, op_assign, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { // check for x = A * x if (op_aliasing(lhs, rhs.rhs())) { vector_base temp(rhs); lhs = temp; } else viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); } }; // y = A^T * x template struct op_executor, op_assign, vector_expression, const matrix_base, op_trans>, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const matrix_base, op_trans>, const vector_base, op_prod> const & rhs) { // check for x = A^T * x if (op_aliasing(lhs, rhs.rhs())) { vector_base temp(rhs); lhs = temp; } else viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); } }; // y += A * x template struct op_executor, op_inplace_add, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { vector_base temp(rhs); lhs += temp; } }; // y += A^T * x template struct op_executor, op_inplace_add, vector_expression, const matrix_base, op_trans>, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const matrix_base, op_trans>, const vector_base, op_prod> const & rhs) { vector_base temp(rhs); lhs += temp; } }; // y -= A * x template struct op_executor, op_inplace_sub, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { vector_base temp(rhs); lhs -= temp; } }; // y -= A^T * x template struct op_executor, op_inplace_sub, vector_expression, const matrix_base, op_trans>, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const matrix_base, op_trans>, const vector_base, op_prod> const & rhs) { vector_base temp(rhs); lhs -= temp; } }; ////////////////// Rank-1 Updates /////////////// // A = v1 * v2^T template struct op_executor, op_assign, matrix_expression, const vector_base, op_prod> > { static void apply(matrix_base & lhs, matrix_expression, const vector_base, op_prod> const & rhs) { lhs.clear(); viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs()); } }; // A = alpha * v1 * v2^T template struct op_executor, op_assign, matrix_expression< const matrix_expression, const vector_base, op_prod>, const ScalarType, op_mult> > { static void apply(matrix_base & lhs, matrix_expression< const matrix_expression, const vector_base, op_prod>, const ScalarType, op_mult> const & rhs) { lhs.clear(); viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs()); } }; // A += v1 * v2^T template struct op_executor, op_inplace_add, matrix_expression, const vector_base, op_prod> > { static void apply(matrix_base & lhs, matrix_expression, const vector_base, op_prod> const & rhs) { viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs()); } }; // A += alpha * v1 * v2^T template struct op_executor, op_inplace_add, matrix_expression< const matrix_expression, const vector_base, op_prod>, const ScalarType, op_mult> > { static void apply(matrix_base & lhs, matrix_expression< const matrix_expression, const vector_base, op_prod>, const ScalarType, op_mult> const & rhs) { viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs()); } }; // A -= v1 * v2^T template struct op_executor, op_inplace_sub, matrix_expression, const vector_base, op_prod> > { static void apply(matrix_base & lhs, matrix_expression, const vector_base, op_prod> const & rhs) { viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, true, rhs.lhs(), rhs.rhs()); } }; // A -= alpha * v1 * v2^T template struct op_executor, op_inplace_sub, matrix_expression< const matrix_expression, const vector_base, op_prod>, const ScalarType, op_mult> > { static void apply(matrix_base & lhs, matrix_expression< const matrix_expression, const vector_base, op_prod>, const ScalarType, op_mult> const & rhs) { viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, true, rhs.lhs().lhs(), rhs.lhs().rhs()); } }; } // namespace detail } // namespace linalg /** \endcond */ } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/fft.hpp000644 001750 001750 00000063676 12267307531 017077 0ustar00rupprupp000000 000000 #ifndef VIENNACL_FFT_HPP #define VIENNACL_FFT_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/fft.hpp @brief All routines related to the Fast Fourier Transform. Experimental. */ #include #include #include "viennacl/linalg/opencl/kernels/fft.hpp" #include #include namespace viennacl { namespace detail { namespace fft { const vcl_size_t MAX_LOCAL_POINTS_NUM = 512; namespace FFT_DATA_ORDER { enum DATA_ORDER { ROW_MAJOR, COL_MAJOR }; } } } } /// @cond namespace viennacl { namespace detail { namespace fft { inline bool is_radix2(vcl_size_t data_size) { return !((data_size > 2) && (data_size & (data_size - 1))); } inline vcl_size_t next_power_2(vcl_size_t n) { n = n - 1; vcl_size_t power = 1; while(power < sizeof(vcl_size_t) * 8) { n = n | (n >> power); power *= 2; } return n + 1; } inline vcl_size_t num_bits(vcl_size_t size) { vcl_size_t bits_datasize = 0; vcl_size_t ds = 1; while(ds < size) { ds = ds << 1; bits_datasize++; } return bits_datasize; } /** * @brief Direct algorithm for computing Fourier transformation. * * Works on any sizes of data. * Serial implementation has o(n^2) complexity */ template void direct(const viennacl::ocl::handle& in, const viennacl::ocl::handle& out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, SCALARTYPE sign = -1.0f, FFT_DATA_ORDER::DATA_ORDER data_order = FFT_DATA_ORDER::ROW_MAJOR ) { viennacl::ocl::context & ctx = const_cast(in.context()); viennacl::linalg::opencl::kernels::fft::init(ctx); std::string program_string = viennacl::linalg::opencl::kernels::matrix::program_name(); if (data_order == FFT_DATA_ORDER::COL_MAJOR) { viennacl::linalg::opencl::kernels::matrix::init(ctx); program_string = viennacl::linalg::opencl::kernels::matrix::program_name(); } else viennacl::linalg::opencl::kernels::matrix::init(ctx); viennacl::ocl::kernel& kernel = ctx.get_kernel(program_string, "fft_direct"); viennacl::ocl::enqueue(kernel(in, out, static_cast(size), static_cast(stride), static_cast(batch_num), sign)); } /* * This function performs reorder of input data. Indexes are sorted in bit-reversal order. * Such reordering should be done before in-place FFT. */ template void reorder(const viennacl::ocl::handle& in, vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num, FFT_DATA_ORDER::DATA_ORDER data_order = FFT_DATA_ORDER::ROW_MAJOR ) { viennacl::ocl::context & ctx = const_cast(in.context()); viennacl::linalg::opencl::kernels::fft::init(ctx); std::string program_string = viennacl::linalg::opencl::kernels::matrix::program_name(); if (data_order == FFT_DATA_ORDER::COL_MAJOR) { viennacl::linalg::opencl::kernels::matrix::init(ctx); program_string = viennacl::linalg::opencl::kernels::matrix::program_name(); } else viennacl::linalg::opencl::kernels::matrix::init(ctx); viennacl::ocl::kernel& kernel = ctx.get_kernel(program_string, "fft_reorder"); viennacl::ocl::enqueue(kernel(in, static_cast(bits_datasize), static_cast(size), static_cast(stride), static_cast(batch_num) ) ); } /** * @brief Radix-2 algorithm for computing Fourier transformation. * * Works only on power-of-two sizes of data. * Serial implementation has o(n * lg n) complexity. * This is a Cooley-Tukey algorithm */ template void radix2(const viennacl::ocl::handle& in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, SCALARTYPE sign = -1.0f, FFT_DATA_ORDER::DATA_ORDER data_order = FFT_DATA_ORDER::ROW_MAJOR ) { viennacl::ocl::context & ctx = const_cast(in.context()); viennacl::linalg::opencl::kernels::fft::init(ctx); assert(batch_num != 0); assert(is_radix2(size)); std::string program_string = viennacl::linalg::opencl::kernels::matrix::program_name(); if (data_order == FFT_DATA_ORDER::COL_MAJOR) { viennacl::linalg::opencl::kernels::matrix::init(ctx); program_string = viennacl::linalg::opencl::kernels::matrix::program_name(); } else viennacl::linalg::opencl::kernels::matrix::init(ctx); vcl_size_t bits_datasize = num_bits(size); if(size <= MAX_LOCAL_POINTS_NUM) { viennacl::ocl::kernel& kernel = ctx.get_kernel(program_string, "fft_radix2_local"); viennacl::ocl::enqueue(kernel(in, viennacl::ocl::local_mem((size * 4) * sizeof(SCALARTYPE)), static_cast(bits_datasize), static_cast(size), static_cast(stride), static_cast(batch_num), sign)); } else { reorder(in, size, stride, bits_datasize, batch_num); for(vcl_size_t step = 0; step < bits_datasize; step++) { viennacl::ocl::kernel& kernel = ctx.get_kernel(program_string, "fft_radix2"); viennacl::ocl::enqueue(kernel(in, static_cast(step), static_cast(bits_datasize), static_cast(size), static_cast(stride), static_cast(batch_num), sign)); } } } /** * @brief Bluestein's algorithm for computing Fourier transformation. * * Currently, Works only for sizes of input data which less than 2^16. * Uses a lot of additional memory, but should be fast for any size of data. * Serial implementation has something about o(n * lg n) complexity */ template void bluestein(viennacl::vector& in, viennacl::vector& out, vcl_size_t /*batch_num*/) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(in).context()); viennacl::linalg::opencl::kernels::fft::init(ctx); vcl_size_t size = in.size() >> 1; vcl_size_t ext_size = next_power_2(2 * size - 1); viennacl::vector A(ext_size << 1); viennacl::vector B(ext_size << 1); viennacl::vector Z(ext_size << 1); { viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft::program_name(), "zero2"); viennacl::ocl::enqueue(kernel( A, B, static_cast(ext_size) )); } { viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft::program_name(), "bluestein_pre"); viennacl::ocl::enqueue(kernel( in, A, B, static_cast(size), static_cast(ext_size) )); } viennacl::linalg::convolve_i(A, B, Z); { viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft::program_name(), "bluestein_post"); viennacl::ocl::enqueue(kernel( Z, out, static_cast(size) )); } } template void multiply(viennacl::vector const & input1, viennacl::vector const & input2, viennacl::vector & output) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(input1).context()); viennacl::linalg::opencl::kernels::fft::init(ctx); vcl_size_t size = input1.size() >> 1; viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft::program_name(), "fft_mult_vec"); viennacl::ocl::enqueue(kernel(input1, input2, output, static_cast(size))); } template void normalize(viennacl::vector & input) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(input).context()); viennacl::linalg::opencl::kernels::fft::init(ctx); viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft::program_name(), "fft_div_vec_scalar"); vcl_size_t size = input.size() >> 1; SCALARTYPE norm_factor = static_cast(size); viennacl::ocl::enqueue(kernel(input, static_cast(size), norm_factor)); } template void transpose(viennacl::matrix & input) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(input).context()); viennacl::linalg::opencl::kernels::fft::init(ctx); viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft::program_name(), "transpose_inplace"); viennacl::ocl::enqueue(kernel(input, static_cast(input.internal_size1()), static_cast(input.internal_size2()) >> 1)); } template void transpose(viennacl::matrix const & input, viennacl::matrix & output) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(input).context()); viennacl::linalg::opencl::kernels::fft::init(ctx); viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft::program_name(), "transpose"); viennacl::ocl::enqueue(kernel(input, output, static_cast(input.internal_size1()), static_cast(input.internal_size2() >> 1)) ); } template void real_to_complex(viennacl::vector_base const & in, viennacl::vector_base & out, vcl_size_t size) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(in).context()); viennacl::linalg::opencl::kernels::fft::init(ctx); viennacl::ocl::kernel & kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft::program_name(), "real_to_complex"); viennacl::ocl::enqueue(kernel(in, out, static_cast(size))); } template void complex_to_real(viennacl::vector_base const & in, viennacl::vector_base& out, vcl_size_t size) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(in).context()); viennacl::linalg::opencl::kernels::fft::init(ctx); viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft::program_name(), "complex_to_real"); viennacl::ocl::enqueue(kernel(in, out, static_cast(size))); } template void reverse(viennacl::vector_base& in) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(in).context()); viennacl::linalg::opencl::kernels::fft::init(ctx); vcl_size_t size = in.size(); viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft::program_name(), "reverse_inplace"); viennacl::ocl::enqueue(kernel(in, static_cast(size))); } } //namespace fft } //namespace detail /** * @brief Generic inplace version of 1-D Fourier transformation. * * @param input Input vector, result will be stored here. * @param batch_num Number of items in batch * @param sign Sign of exponent, default is -1.0 */ template void inplace_fft(viennacl::vector& input, vcl_size_t batch_num = 1, SCALARTYPE sign = -1.0) { vcl_size_t size = (input.size() >> 1) / batch_num; if(!viennacl::detail::fft::is_radix2(size)) { viennacl::vector output(input.size()); viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input), viennacl::traits::opencl_handle(output), size, size, batch_num, sign); viennacl::copy(output, input); } else { viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(input), size, size, batch_num, sign); } } /** * @brief Generic version of 1-D Fourier transformation. * * @param input Input vector. * @param output Output vector. * @param batch_num Number of items in batch. * @param sign Sign of exponent, default is -1.0 */ template void fft(viennacl::vector& input, viennacl::vector& output, vcl_size_t batch_num = 1, SCALARTYPE sign = -1.0 ) { vcl_size_t size = (input.size() >> 1) / batch_num; if(viennacl::detail::fft::is_radix2(size)) { viennacl::copy(input, output); viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(output), size, size, batch_num, sign); } else { viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input), viennacl::traits::opencl_handle(output), size, size, batch_num, sign); } } /** * @brief Generic inplace version of 2-D Fourier transformation. * * @param input Input matrix, result will be stored here. * @param sign Sign of exponent, default is -1.0 */ template void inplace_fft(viennacl::matrix& input, SCALARTYPE sign = -1.0) { vcl_size_t rows_num = input.size1(); vcl_size_t cols_num = input.size2() >> 1; vcl_size_t cols_int = input.internal_size2() >> 1; // batch with rows if(viennacl::detail::fft::is_radix2(cols_num)) { viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(input), cols_num, cols_int, rows_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::ROW_MAJOR); } else { viennacl::matrix output(input.size1(), input.size2()); viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input), viennacl::traits::opencl_handle(output), cols_num, cols_int, rows_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::ROW_MAJOR ); input = output; } // batch with cols if (viennacl::detail::fft::is_radix2(rows_num)) { viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(input), rows_num, cols_int, cols_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::COL_MAJOR); } else { viennacl::matrix output(input.size1(), input.size2()); viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input), viennacl::traits::opencl_handle(output), rows_num, cols_int, cols_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::COL_MAJOR); input = output; } } /** * @brief Generic version of 2-D Fourier transformation. * * @param input Input vector. * @param output Output vector. * @param sign Sign of exponent, default is -1.0 */ template void fft(viennacl::matrix& input, viennacl::matrix& output, SCALARTYPE sign = -1.0) { vcl_size_t rows_num = input.size1(); vcl_size_t cols_num = input.size2() >> 1; vcl_size_t cols_int = input.internal_size2() >> 1; // batch with rows if(viennacl::detail::fft::is_radix2(cols_num)) { output = input; viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(output), cols_num, cols_int, rows_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::ROW_MAJOR); } else { viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input), viennacl::traits::opencl_handle(output), cols_num, cols_int, rows_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::ROW_MAJOR ); } // batch with cols if(viennacl::detail::fft::is_radix2(rows_num)) { viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(output), rows_num, cols_int, cols_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::COL_MAJOR); } else { viennacl::matrix tmp(output.size1(), output.size2()); tmp = output; viennacl::detail::fft::direct(viennacl::traits::opencl_handle(tmp), viennacl::traits::opencl_handle(output), rows_num, cols_int, cols_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::COL_MAJOR); } } /** * @brief Generic inplace version of inverse 1-D Fourier transformation. * * Shorthand function for fft(sign = 1.0) * * @param input Input vector. * @param batch_num Number of items in batch. * @param sign Sign of exponent, default is -1.0 */ template void inplace_ifft(viennacl::vector& input, vcl_size_t batch_num = 1) { viennacl::inplace_fft(input, batch_num, SCALARTYPE(1.0)); viennacl::detail::fft::normalize(input); } /** * @brief Generic version of inverse 1-D Fourier transformation. * * Shorthand function for fft(sign = 1.0) * * @param input Input vector. * @param output Output vector. * @param batch_num Number of items in batch. * @param sign Sign of exponent, default is -1.0 */ template void ifft(viennacl::vector& input, viennacl::vector& output, vcl_size_t batch_num = 1 ) { viennacl::fft(input, output, batch_num, SCALARTYPE(1.0)); viennacl::detail::fft::normalize(output); } namespace linalg { /** * @brief 1-D convolution of two vectors. * * This function does not make any changes to input vectors * * @param input1 Input vector #1. * @param input2 Input vector #2. * @param output Output vector. */ template void convolve(viennacl::vector& input1, viennacl::vector& input2, viennacl::vector& output ) { assert(input1.size() == input2.size()); assert(input1.size() == output.size()); //temporal arrays viennacl::vector tmp1(input1.size()); viennacl::vector tmp2(input2.size()); viennacl::vector tmp3(output.size()); // align input arrays to equal size // FFT of input data viennacl::fft(input1, tmp1); viennacl::fft(input2, tmp2); // multiplication of input data viennacl::detail::fft::multiply(tmp1, tmp2, tmp3); // inverse FFT of input data viennacl::ifft(tmp3, output); } /** * @brief 1-D convolution of two vectors. * * This function can make changes to input vectors to avoid additional memory allocations. * * @param input1 Input vector #1. * @param input2 Input vector #2. * @param output Output vector. */ template void convolve_i(viennacl::vector& input1, viennacl::vector& input2, viennacl::vector& output ) { assert(input1.size() == input2.size()); assert(input1.size() == output.size()); viennacl::inplace_fft(input1); viennacl::inplace_fft(input2); viennacl::detail::fft::multiply(input1, input2, output); viennacl::inplace_ifft(output); } } } //namespace linalg /// @endcond #endif ViennaCL-1.5.1-src/viennacl/toeplitz_matrix.hpp000644 001750 001750 00000035124 12267307531 021541 0ustar00rupprupp000000 000000 #ifndef VIENNACL_TOEPLITZ_MATRIX_HPP #define VIENNACL_TOEPLITZ_MATRIX_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file toeplitz_matrix.hpp @brief Implementation of the toeplitz_matrix class for efficient manipulation of Toeplitz matrices. Experimental. */ #include "viennacl/forwards.h" #include "viennacl/vector.hpp" #include "viennacl/ocl/backend.hpp" #include "viennacl/fft.hpp" #include "viennacl/linalg/toeplitz_matrix_operations.hpp" namespace viennacl { /** @brief A Toeplitz matrix class * * @tparam SCALARTYPE The underlying scalar type (either float or double) * @tparam ALIGNMENT The internal memory size is given by (size()/ALIGNMENT + 1) * ALIGNMENT. ALIGNMENT must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. */ template class toeplitz_matrix { public: typedef viennacl::backend::mem_handle handle_type; typedef scalar::ResultType> value_type; /** * @brief The default constructor. Does not allocate any memory. * */ explicit toeplitz_matrix() {} /** @brief Creates the matrix with the given size * * @param rows Number of rows of the matrix * @param cols Number of columns of the matrix */ explicit toeplitz_matrix(vcl_size_t rows, vcl_size_t cols) : elements_(rows * 2) { assert(rows == cols && bool("Toeplitz matrix must be square!")); (void)cols; // avoid 'unused parameter' warning in optimized builds } /** @brief Resizes the matrix. * Existing entries can be preserved * * @param sz New size of matrix * @param preserve If true, existing values are preserved. */ void resize(vcl_size_t sz, bool preserve = true) { elements_.resize(sz * 2, preserve); } /** @brief Returns the OpenCL handle * * @return OpenCL handle */ handle_type const & handle() const { return elements_.handle(); } /** * @brief Returns an internal viennacl::vector, which represents a Toeplitz matrix elements * */ viennacl::vector & elements() { return elements_; } viennacl::vector const & elements() const { return elements_; } /** * @brief Returns the number of rows of the matrix */ vcl_size_t size1() const { return elements_.size() / 2; } /** * @brief Returns the number of columns of the matrix */ vcl_size_t size2() const { return elements_.size() / 2; } /** @brief Returns the internal size of matrix representtion. * Usually required for launching OpenCL kernels only * * @return Internal size of matrix representation */ vcl_size_t internal_size() const { return elements_.internal_size(); } /** * @brief Read-write access to a single element of the matrix * * @param row_index Row index of accessed element * @param col_index Column index of accessed element * @return Proxy for matrix entry */ entry_proxy operator()(vcl_size_t row_index, vcl_size_t col_index) { assert(row_index < size1() && col_index < size2() && bool("Invalid access")); long index = static_cast(col_index) - static_cast(row_index); if (index < 0) index = -index; else if (index > 0) index = 2 * static_cast(size1()) - index; return elements_[index]; } /** * @brief += operation for Toeplitz matrices * * @param that Matrix which will be added * @return Result of addition */ toeplitz_matrix& operator +=(toeplitz_matrix& that) { elements_ += that.elements(); return *this; } private: toeplitz_matrix(toeplitz_matrix const &) {} toeplitz_matrix & operator=(toeplitz_matrix const & t); viennacl::vector elements_; }; /** @brief Copies a Toeplitz matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) * * * @param cpu_vec A std::vector on the host. * @param gpu_mat A toeplitz_matrix from ViennaCL */ template void copy(std::vector const & cpu_vec, toeplitz_matrix& gpu_mat) { assert( (gpu_mat.size1() == 0 || (gpu_mat.size1() * 2 - 1) == cpu_vec.size()) && bool("Size mismatch")); vcl_size_t size = gpu_mat.size1(); std::vector rvrs(cpu_vec.size()); std::copy(cpu_vec.begin(), cpu_vec.end(), rvrs.begin()); std::reverse(rvrs.begin(), rvrs.end()); std::vector tmp(size * 2); std::copy(rvrs.begin() + size - 1, rvrs.end(), tmp.begin()); std::copy(rvrs.begin(), rvrs.begin() + size - 1, tmp.begin() + size + 1); tmp[size] = 0.0; copy(tmp, gpu_mat.elements()); } /** @brief Copies a Toeplitz matrix from the OpenCL device (either GPU or multi-core CPU) to the std::vector * * * @param gpu_mat A toeplitz_matrix from ViennaCL * @param cpu_vec A std::vector on the host. */ template void copy(toeplitz_matrix const & gpu_mat, std::vector & cpu_vec) { assert((gpu_mat.size1() * 2 - 1) == cpu_vec.size() && bool("Size mismatch")); vcl_size_t size = gpu_mat.size1(); std::vector tmp(size * 2); copy(gpu_mat.elements(), tmp); std::reverse(tmp.begin(), tmp.end()); std::copy(tmp.begin(), tmp.begin() + size - 1, cpu_vec.begin() + size); std::copy(tmp.begin() + size, tmp.end(), cpu_vec.begin()); } /** @brief Copies a Toeplitz matrix from the OpenCL device (either GPU or multi-core CPU) to the matrix-like object * * * @param tep_src A toeplitz_matrix from ViennaCL * @param com_dst A matrix-like object */ template void copy(toeplitz_matrix const & tep_src, MATRIXTYPE & com_dst) { assert(tep_src.size1() == viennacl::traits::size1(com_dst) && bool("Size mismatch")); assert(tep_src.size2() == viennacl::traits::size2(com_dst) && bool("Size mismatch")); vcl_size_t size = tep_src.size1(); std::vector tmp(tep_src.size1() * 2 - 1); copy(tep_src, tmp); for(vcl_size_t i = 0; i < size; i++) for(vcl_size_t j = 0; j < size; j++) com_dst(i, j) = tmp[static_cast(j) - static_cast(i) + static_cast(size) - 1]; } /** @brief Copies a the matrix-like object to the Toeplitz matrix from the OpenCL device (either GPU or multi-core CPU) * * * @param com_src A std::vector on the host * @param tep_dst A toeplitz_matrix from ViennaCL */ template void copy(MATRIXTYPE const & com_src, toeplitz_matrix& tep_dst) { assert( (tep_dst.size1() == 0 || tep_dst.size1() == viennacl::traits::size1(com_src)) && bool("Size mismatch")); assert( (tep_dst.size2() == 0 || tep_dst.size2() == viennacl::traits::size2(com_src)) && bool("Size mismatch")); vcl_size_t size = tep_dst.size1(); std::vector tmp(2*size - 1); for(long i = static_cast(size) - 1; i >= 0; i--) tmp[size - i - 1] = com_src(i, 0); for(vcl_size_t i = 1; i < size; i++) tmp[size + i - 1] = com_src(0, i); copy(tmp, tep_dst); } /*template void prod_impl(toeplitz_matrix& mat, vector& vec, vector& result) { viennacl::vector tep(mat.elements().size() * 2); fft::real_to_complex(mat.elements(), tep, mat.elements().size()); viennacl::vector tmp(vec.size() * 4); viennacl::vector tmp2(vec.size() * 4); tmp.clear(); copy(vec, tmp); fft::real_to_complex(tmp, tmp2, vec.size() * 2); fft::convolve(tep, tmp2, tmp); fft::complex_to_real(tmp, tmp2, vec.size() * 2); copy(tmp2.begin(), tmp2.begin() + vec.size(), result.begin()); }*/ /** @brief Prints the matrix. Output is compatible to boost::numeric::ublas * * @param s STL output stream * @param gpu_matrix A ViennaCL Toeplitz matrix */ template std::ostream & operator<<(std::ostream & s, toeplitz_matrix& gpu_matrix) { vcl_size_t size = gpu_matrix.size1(); std::vector tmp(2*size - 1); copy(gpu_matrix, tmp); s << "[" << size << "," << size << "]("; for(vcl_size_t i = 0; i < size; i++) { s << "("; for(vcl_size_t j = 0; j < size; j++) { s << tmp[static_cast(j) - static_cast(i) + static_cast(size - 1)]; //s << (int)i - (int)j; if(j < (size - 1)) s << ","; } s << ")"; } s << ")"; return s; } // // Specify available operations: // /** \cond */ namespace linalg { namespace detail { // x = A * y template struct op_executor, op_assign, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { // check for the special case x = A * x if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs = temp; } else viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); } }; template struct op_executor, op_inplace_add, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs += temp; } }; template struct op_executor, op_inplace_sub, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs -= temp; } }; // x = A * vec_op template struct op_executor, op_assign, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs()); viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); } }; // x = A * vec_op template struct op_executor, op_inplace_add, vector_expression, vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs()); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs += temp_result; } }; // x = A * vec_op template struct op_executor, op_inplace_sub, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs()); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs -= temp_result; } }; } // namespace detail } // namespace linalg /** \endcond */ } #endif // VIENNACL_TOEPLITZ_MATRIX_HPP ViennaCL-1.5.1-src/viennacl/meta/000755 001750 001750 00000000000 12267307531 016513 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/viennacl/meta/predicate.hpp000644 001750 001750 00000034215 12267307531 021171 0ustar00rupprupp000000 000000 #ifndef VIENNACL_META_PREDICATE_HPP_ #define VIENNACL_META_PREDICATE_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file predicate.hpp @brief All the predicates used within ViennaCL. Checks for expressions to be vectors, etc. */ #include #include #include #include "viennacl/forwards.h" #ifdef VIENNACL_WITH_OPENCL #ifdef __APPLE__ #include #else #include "CL/cl.h" #endif #endif namespace viennacl { // // is_cpu_scalar: checks for float or double // //template //struct is_cpu_scalar //{ // enum { value = false }; //}; /** \cond */ template <> struct is_cpu_scalar { enum { value = true }; }; template <> struct is_cpu_scalar { enum { value = true }; }; template <> struct is_cpu_scalar { enum { value = true }; }; template <> struct is_cpu_scalar { enum { value = true }; }; template <> struct is_cpu_scalar { enum { value = true }; }; template <> struct is_cpu_scalar { enum { value = true }; }; template <> struct is_cpu_scalar { enum { value = true }; }; template <> struct is_cpu_scalar { enum { value = true }; }; template <> struct is_cpu_scalar { enum { value = true }; }; template <> struct is_cpu_scalar { enum { value = true }; }; /** \endcond */ // // is_scalar: checks for viennacl::scalar // //template //struct is_scalar //{ // enum { value = false }; //}; /** \cond */ template struct is_scalar > { enum { value = true }; }; /** \endcond */ // // is_flip_sign_scalar: checks for viennacl::scalar modified with unary operator- // //template //struct is_flip_sign_scalar //{ // enum { value = false }; //}; /** \cond */ template struct is_flip_sign_scalar, const scalar, op_flip_sign> > { enum { value = true }; }; /** \endcond */ // // is_any_scalar: checks for either CPU and GPU scalars, i.e. is_cpu_scalar<>::value || is_scalar<>::value // //template //struct is_any_scalar //{ // enum { value = (is_scalar::value || is_cpu_scalar::value || is_flip_sign_scalar::value )}; //}; // /** \cond */ #define VIENNACL_MAKE_ANY_VECTOR_TRUE(type) template<> struct is_any_vector< type > { enum { value = 1 }; }; #define VIENNACL_MAKE_FOR_ALL_SCALARTYPE(type) \ VIENNACL_MAKE_ANY_VECTOR_TRUE(type)\ VIENNACL_MAKE_ANY_VECTOR_TRUE(type) VIENNACL_MAKE_FOR_ALL_SCALARTYPE(viennacl::vector) VIENNACL_MAKE_FOR_ALL_SCALARTYPE(viennacl::vector_range) VIENNACL_MAKE_FOR_ALL_SCALARTYPE(viennacl::vector_slice) VIENNACL_MAKE_FOR_ALL_SCALARTYPE(viennacl::unit_vector) VIENNACL_MAKE_FOR_ALL_SCALARTYPE(viennacl::zero_vector) VIENNACL_MAKE_FOR_ALL_SCALARTYPE(viennacl::one_vector) VIENNACL_MAKE_FOR_ALL_SCALARTYPE(viennacl::scalar_vector) #undef VIENNACL_MAKE_FOR_ALL_SCALARTYPE #undef VIENNACL_MAKE_ANY_VECTOR_TRUE /** \endcond */ /** \cond */ #define VIENNACL_MAKE_ANY_MATRIX_TRUE(TYPE)\ template<> struct is_any_dense_matrix< TYPE > { enum { value = 1 }; }; #define VIENNACL_MAKE_FOR_ALL_SCALARTYPE(TYPE) \ VIENNACL_MAKE_ANY_MATRIX_TRUE(TYPE)\ VIENNACL_MAKE_ANY_MATRIX_TRUE(TYPE) #define COMMA , #define VIENNACL_MAKE_FOR_ALL_SCALARTYPE_LAYOUT(TYPE) \ VIENNACL_MAKE_ANY_MATRIX_TRUE(TYPE)\ VIENNACL_MAKE_ANY_MATRIX_TRUE(TYPE)\ VIENNACL_MAKE_ANY_MATRIX_TRUE(TYPE)\ VIENNACL_MAKE_ANY_MATRIX_TRUE(TYPE) VIENNACL_MAKE_FOR_ALL_SCALARTYPE_LAYOUT(viennacl::matrix) // VIENNACL_MAKE_FOR_ALL_SCALARTYPE_LAYOUT(viennacl::matrix_range) // VIENNACL_MAKE_FOR_ALL_SCALARTYPE_LAYOUT(viennacl::matrix_slice) VIENNACL_MAKE_FOR_ALL_SCALARTYPE(viennacl::identity_matrix) VIENNACL_MAKE_FOR_ALL_SCALARTYPE(viennacl::zero_matrix) VIENNACL_MAKE_FOR_ALL_SCALARTYPE(viennacl::scalar_matrix) #undef VIENNACL_MAKE_FOR_ALL_SCALARTYPE_LAYOUT #undef VIENNACL_MAKE_FOR_ALL_SCALARTYPE #undef VIENNACL_MAKE_ANY_MATRIX_TRUE /** \endcond */ // // is_row_major // //template //struct is_row_major //{ // enum { value = false }; //}; /** \cond */ template struct is_row_major > { enum { value = true }; }; template <> struct is_row_major< viennacl::row_major > { enum { value = true }; }; template struct is_row_major > { enum { value = is_row_major::value }; }; /** \endcond */ // // is_circulant_matrix // //template //struct is_circulant_matrix //{ // enum { value = false }; //}; /** \cond */ template struct is_circulant_matrix > { enum { value = true }; }; template struct is_circulant_matrix > { enum { value = true }; }; /** \endcond */ // // is_hankel_matrix // //template //struct is_hankel_matrix //{ // enum { value = false }; //}; /** \cond */ template struct is_hankel_matrix > { enum { value = true }; }; template struct is_hankel_matrix > { enum { value = true }; }; /** \endcond */ // // is_toeplitz_matrix // //template //struct is_toeplitz_matrix //{ // enum { value = false }; //}; /** \cond */ template struct is_toeplitz_matrix > { enum { value = true }; }; template struct is_toeplitz_matrix > { enum { value = true }; }; /** \endcond */ // // is_vandermonde_matrix // //template //struct is_vandermonde_matrix //{ // enum { value = false }; //}; /** \cond */ template struct is_vandermonde_matrix > { enum { value = true }; }; template struct is_vandermonde_matrix > { enum { value = true }; }; /** \endcond */ // // is_compressed_matrix // /** \cond */ template struct is_compressed_matrix > { enum { value = true }; }; /** \endcond */ // // is_coordinate_matrix // /** \cond */ template struct is_coordinate_matrix > { enum { value = true }; }; /** \endcond */ // // is_ell_matrix // /** \cond */ template struct is_ell_matrix > { enum { value = true }; }; /** \endcond */ // // is_hyb_matrix // /** \cond */ template struct is_hyb_matrix > { enum { value = true }; }; /** \endcond */ // // is_any_sparse_matrix // //template //struct is_any_sparse_matrix //{ // enum { value = false }; //}; /** \cond */ template struct is_any_sparse_matrix > { enum { value = true }; }; template struct is_any_sparse_matrix > { enum { value = true }; }; template struct is_any_sparse_matrix > { enum { value = true }; }; template struct is_any_sparse_matrix > { enum { value = true }; }; template struct is_any_sparse_matrix > { enum { value = true }; }; template struct is_any_sparse_matrix { enum { value = is_any_sparse_matrix::value }; }; /** \endcond */ //////////////// Part 2: Operator predicates //////////////////// // // is_addition // /** @brief Helper metafunction for checking whether the provided type is viennacl::op_add (for addition) */ template struct is_addition { enum { value = false }; }; /** \cond */ template <> struct is_addition { enum { value = true }; }; /** \endcond */ // // is_subtraction // /** @brief Helper metafunction for checking whether the provided type is viennacl::op_sub (for subtraction) */ template struct is_subtraction { enum { value = false }; }; /** \cond */ template <> struct is_subtraction { enum { value = true }; }; /** \endcond */ // // is_product // /** @brief Helper metafunction for checking whether the provided type is viennacl::op_prod (for products/multiplication) */ template struct is_product { enum { value = false }; }; /** \cond */ template <> struct is_product { enum { value = true }; }; template <> struct is_product { enum { value = true }; }; template <> struct is_product > { enum { value = true }; }; /** \endcond */ // // is_division // /** @brief Helper metafunction for checking whether the provided type is viennacl::op_div (for division) */ template struct is_division { enum { value = false }; }; /** \cond */ template <> struct is_division { enum { value = true }; }; template <> struct is_division > { enum { value = true }; }; /** \endcond */ // is_primitive_type // /** @brief Helper class for checking whether a type is a primitive type. */ template struct is_primitive_type{ enum {value = false}; }; /** \cond */ template<> struct is_primitive_type { enum { value = true }; }; template<> struct is_primitive_type { enum { value = true }; }; template<> struct is_primitive_type { enum { value = true }; }; template<> struct is_primitive_type { enum { value = true }; }; template<> struct is_primitive_type { enum { value = true }; }; template<> struct is_primitive_type { enum { value = true }; }; template<> struct is_primitive_type { enum { value = true }; }; template<> struct is_primitive_type { enum { value = true }; }; template<> struct is_primitive_type{ enum { value = true }; }; template<> struct is_primitive_type { enum { value = true }; }; /** \endcond */ #ifdef VIENNACL_WITH_OPENCL /** @brief Helper class for checking whether a particular type is a native OpenCL type. */ template struct is_cl_type{ enum { value = false }; }; /** \cond */ template<> struct is_cl_type { enum { value = true }; }; template<> struct is_cl_type{ enum { value = true }; }; template<> struct is_cl_type { enum { value = true }; }; template<> struct is_cl_type { enum { value = true }; }; template<> struct is_cl_type { enum { value = true }; }; template<> struct is_cl_type { enum { value = true }; }; template<> struct is_cl_type { enum { value = true }; }; template<> struct is_cl_type { enum { value = true }; }; template<> struct is_cl_type{ enum { value = true }; }; template<> struct is_cl_type { enum { value = true }; }; /** \endcond */ #endif } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/meta/enable_if.hpp000644 001750 001750 00000002363 12267307531 021134 0ustar00rupprupp000000 000000 #ifndef VIENNACL_META_ENABLE_IF_HPP_ #define VIENNACL_META_ENABLE_IF_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/meta/enable_if.hpp @brief Simple enable-if variant that uses the SFINAE pattern */ namespace viennacl { /** @brief Simple enable-if variant that uses the SFINAE pattern */ template struct enable_if { typedef T type; }; /** \cond */ template struct enable_if {}; /** \endcond */ } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/meta/result_of.hpp000644 001750 001750 00000040122 12267307531 021225 0ustar00rupprupp000000 000000 #ifndef VIENNACL_META_RESULT_OF_HPP_ #define VIENNACL_META_RESULT_OF_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/meta/result_of.hpp @brief A collection of compile time type deductions */ #include #include #include #include "viennacl/forwards.h" #ifdef VIENNACL_WITH_UBLAS #include #include #endif #ifdef VIENNACL_WITH_EIGEN #include #include #endif #ifdef VIENNACL_WITH_MTL4 #include #endif #ifdef VIENNACL_WITH_OPENCL #ifdef __APPLE__ #include #else #include "CL/cl.h" #endif #endif #include #include namespace viennacl { namespace result_of { // // Retrieve alignment from vector // /** @brief Retrieves the alignment from a vector. Deprecated - will be replaced by a pure runtime facility in the future. */ template struct alignment { typedef typename T::ERROR_ARGUMENT_PROVIDED_IS_NOT_A_VECTOR_OR_A_MATRIX error_type; enum { value = 1 }; }; /** \cond */ template struct alignment { enum { value = alignment::value }; }; template struct alignment< vector > { enum { value = ALIGNMENT }; }; template struct alignment< vector_range > { enum { value = alignment::value }; }; template struct alignment< vector_slice > { enum { value = alignment::value }; }; // support for a*x with scalar a and vector x template struct alignment< vector_expression > { enum { value = alignment::value }; }; // Matrices template struct alignment< matrix > { enum { value = ALIGNMENT }; }; template struct alignment< matrix_range > { enum { value = alignment::value }; }; template struct alignment< matrix_slice > { enum { value = alignment::value }; }; template struct alignment< matrix_expression > { enum { value = alignment::value }; }; /** \endcond */ // // Majority specifier for matrices (row_major, column_major) // /** @brief Returns the orientation functor tag (either row_major or column_major) of a matrix */ template struct orientation_functor { typedef typename T::ERROR_ARGUMENT_PROVIDED_IS_NOT_A_MATRIX type; }; /** \cond */ template struct orientation_functor { typedef typename orientation_functor::type type; }; template struct orientation_functor< matrix > { typedef F type; }; template struct orientation_functor< matrix_range > { typedef typename orientation_functor::type type; }; template struct orientation_functor< matrix_slice > { typedef typename orientation_functor::type type; }; template struct orientation_functor< matrix_base > { typedef F type; }; template struct orientation_functor< matrix_expression > { typedef typename orientation_functor::type type; }; /** \endcond */ // // Retrieve size_type // /** @brief Generic meta-function for retrieving the size_type associated with type T */ template struct size_type { typedef typename T::size_type type; }; /** \cond */ template struct size_type< vector_base > { typedef SizeType type; }; #ifdef VIENNACL_WITH_EIGEN template struct size_type< Eigen::Matrix > { typedef vcl_size_t type; }; template <> struct size_type { typedef vcl_size_t type; }; template <> struct size_type { typedef vcl_size_t type; }; template struct size_type > { typedef vcl_size_t type; }; #endif /** \endcond */ // // Retrieve value_type: // /** @brief Generic helper function for retrieving the value_type associated with type T */ template struct value_type { typedef typename T::value_type type; }; /** \cond */ #ifdef VIENNACL_WITH_EIGEN template <> struct value_type { typedef Eigen::MatrixXf::RealScalar type; }; template <> struct value_type { typedef Eigen::MatrixXd::RealScalar type; }; template struct value_type > { typedef ScalarType type; }; template <> struct value_type { typedef Eigen::VectorXf::RealScalar type; }; template <> struct value_type { typedef Eigen::VectorXd::RealScalar type; }; #endif /** \endcond */ // // Retrieve cpu value_type: // /** @brief Helper meta function for retrieving the main RAM-based value type. Particularly important to obtain T from viennacl::scalar in a generic way. */ template struct cpu_value_type { typedef typename T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type; }; /** \cond */ template struct cpu_value_type { typedef typename cpu_value_type::type type; }; template <> struct cpu_value_type { typedef char type; }; template <> struct cpu_value_type { typedef unsigned char type; }; template <> struct cpu_value_type { typedef short type; }; template <> struct cpu_value_type { typedef unsigned short type; }; template <> struct cpu_value_type { typedef int type; }; template <> struct cpu_value_type { typedef unsigned int type; }; template <> struct cpu_value_type { typedef int type; }; template <> struct cpu_value_type { typedef unsigned long type; }; template <> struct cpu_value_type { typedef float type; }; template <> struct cpu_value_type { typedef double type; }; template struct cpu_value_type > { typedef T type; }; template struct cpu_value_type > { typedef T type; }; template struct cpu_value_type > { typedef T type; }; template struct cpu_value_type > { typedef T type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef T type; }; template struct cpu_value_type > { typedef T type; }; template struct cpu_value_type > { typedef T type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; template struct cpu_value_type > { typedef typename cpu_value_type::type type; }; // // Deduce compatible vector type for a matrix type // template struct vector_for_matrix { typedef typename T::ERROR_CANNOT_DEDUCE_VECTOR_FOR_MATRIX_TYPE type; }; //ViennaCL template struct vector_for_matrix< viennacl::matrix > { typedef viennacl::vector type; }; template struct vector_for_matrix< viennacl::compressed_matrix > { typedef viennacl::vector type; }; template struct vector_for_matrix< viennacl::coordinate_matrix > { typedef viennacl::vector type; }; #ifdef VIENNACL_WITH_UBLAS //Boost: template struct vector_for_matrix< boost::numeric::ublas::matrix > { typedef boost::numeric::ublas::vector type; }; template struct vector_for_matrix< boost::numeric::ublas::compressed_matrix > { typedef boost::numeric::ublas::vector type; }; template struct vector_for_matrix< boost::numeric::ublas::coordinate_matrix > { typedef boost::numeric::ublas::vector type; }; #endif template struct reference_if_nonscalar { typedef T & type; }; #define VIENNACL_REFERENCE_IF_NONSCALAR_INT(TNAME) \ template <> struct reference_if_nonscalar { typedef TNAME type; }; \ template <> struct reference_if_nonscalar { typedef const TNAME type; }; \ template <> struct reference_if_nonscalar { typedef unsigned TNAME type; }; \ template <> struct reference_if_nonscalar { typedef const unsigned TNAME type; }; VIENNACL_REFERENCE_IF_NONSCALAR_INT(char) VIENNACL_REFERENCE_IF_NONSCALAR_INT(short) VIENNACL_REFERENCE_IF_NONSCALAR_INT(int) VIENNACL_REFERENCE_IF_NONSCALAR_INT(long) #undef VIENNACL_REFERENCE_IF_NONSCALAR_INT template <> struct reference_if_nonscalar { typedef float type; }; template <> struct reference_if_nonscalar { typedef const float type; }; template <> struct reference_if_nonscalar { typedef double type; }; template <> struct reference_if_nonscalar { typedef const double type; }; /** \endcond */ //OpenCL equivalent type /** @brief Metafunction for deducing the OpenCL type for a numeric type, e.g. float -> cl_float */ template struct cl_type { typedef T type; }; /** \cond */ #ifdef VIENNACL_WITH_OPENCL template<> struct cl_type{ typedef cl_float type; }; template<> struct cl_type{ typedef cl_double type; }; template<> struct cl_type{ typedef cl_int type; }; template<> struct cl_type{ typedef cl_uint type; }; template<> struct cl_type{ typedef cl_long type; }; template<> struct cl_type{ typedef cl_ulong type; }; template<> struct cl_type{ typedef cl_short type; }; template<> struct cl_type{ typedef cl_ushort type; }; template<> struct cl_type{ typedef cl_char type; }; template<> struct cl_type{ typedef cl_uchar type; }; #endif /** \endcond */ } //namespace result_of } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/meta/tag_of.hpp000644 001750 001750 00000022330 12267307531 020463 0ustar00rupprupp000000 000000 #ifndef VIENNACL_META_TAGOF_HPP_ #define VIENNACL_META_TAGOF_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file tag_of.hpp @brief Dispatch facility for distinguishing between ublas, STL and ViennaCL types */ #include #include #include "viennacl/forwards.h" #ifdef VIENNACL_WITH_UBLAS #include #include #include #endif #ifdef VIENNACL_WITH_EIGEN #include #include #endif #ifdef VIENNACL_WITH_MTL4 #include #endif namespace viennacl { // ---------------------------------------------------- // TAGS // /** @brief A tag class for identifying 'unknown' types. */ struct tag_none {}; /** @brief A tag class for identifying types from MTL4. */ struct tag_mtl4 {}; /** @brief A tag class for identifying types from Eigen. */ struct tag_eigen {}; /** @brief A tag class for identifying types from uBLAS. */ struct tag_ublas {}; /** @brief A tag class for identifying types from the C++ STL. */ struct tag_stl {}; /** @brief A tag class for identifying types from ViennaCL. */ struct tag_viennacl {}; namespace traits { // ---------------------------------------------------- // GENERIC BASE // /** @brief Generic base for wrapping other linear algebra packages * * Maps types to tags, e.g. viennacl::vector to tag_viennacl, ublas::vector to tag_ublas * if the matrix type is unknown, tag_none is returned * * This is an internal function only, there is no need for a library user of ViennaCL to care about it any further * * @tparam T The type to be inspected */ template< typename T, typename Active = void > struct tag_of; /** \cond */ template < typename Sequence, typename Active > struct tag_of { typedef viennacl::tag_none type; }; #ifdef VIENNACL_WITH_MTL4 // ---------------------------------------------------- // MTL4 // template struct tag_of< mtl::dense_vector > { typedef viennacl::tag_mtl4 type; }; template struct tag_of< mtl::compressed2D > { typedef viennacl::tag_mtl4 type; }; template struct tag_of< mtl::dense2D > { typedef viennacl::tag_mtl4 type; }; #endif #ifdef VIENNACL_WITH_EIGEN // ---------------------------------------------------- // Eigen // template <> struct tag_of< Eigen::VectorXf > { typedef viennacl::tag_eigen type; }; template <> struct tag_of< Eigen::VectorXd > { typedef viennacl::tag_eigen type; }; template <> struct tag_of< Eigen::MatrixXf > { typedef viennacl::tag_eigen type; }; template <> struct tag_of< Eigen::MatrixXd > { typedef viennacl::tag_eigen type; }; template struct tag_of< Eigen::SparseMatrix > { typedef viennacl::tag_eigen type; }; #endif #ifdef VIENNACL_WITH_UBLAS // ---------------------------------------------------- // UBLAS // template< typename T > struct tag_of< boost::numeric::ublas::vector > { typedef viennacl::tag_ublas type; }; template< typename T > struct tag_of< boost::numeric::ublas::matrix > { typedef viennacl::tag_ublas type; }; template< typename T1, typename T2 > struct tag_of< boost::numeric::ublas::matrix_unary2 > { typedef viennacl::tag_ublas type; }; template< typename T1, typename T2 > struct tag_of< boost::numeric::ublas::compressed_matrix > { typedef viennacl::tag_ublas type; }; #endif // ---------------------------------------------------- // STL types // //vector template< typename T, typename A > struct tag_of< std::vector > { typedef viennacl::tag_stl type; }; //dense matrix template< typename T, typename A > struct tag_of< std::vector, A> > { typedef viennacl::tag_stl type; }; //sparse matrix (vector of maps) template< typename KEY, typename DATA, typename COMPARE, typename AMAP, typename AVEC> struct tag_of< std::vector, AVEC> > { typedef viennacl::tag_stl type; }; // ---------------------------------------------------- // VIENNACL // template< typename T, unsigned int alignment > struct tag_of< viennacl::vector > { typedef viennacl::tag_viennacl type; }; template< typename T, typename F, unsigned int alignment > struct tag_of< viennacl::matrix > { typedef viennacl::tag_viennacl type; }; template< typename T1, typename T2, typename OP > struct tag_of< viennacl::matrix_expression > { typedef viennacl::tag_viennacl type; }; template< typename T > struct tag_of< viennacl::matrix_range > { typedef viennacl::tag_viennacl type; }; template< typename T, unsigned int I> struct tag_of< viennacl::compressed_matrix > { typedef viennacl::tag_viennacl type; }; template< typename T, unsigned int I> struct tag_of< viennacl::coordinate_matrix > { typedef viennacl::tag_viennacl type; }; template< typename T, unsigned int I> struct tag_of< viennacl::ell_matrix > { typedef viennacl::tag_viennacl type; }; template< typename T, unsigned int I> struct tag_of< viennacl::hyb_matrix > { typedef viennacl::tag_viennacl type; }; template< typename T, unsigned int I> struct tag_of< viennacl::circulant_matrix > { typedef viennacl::tag_viennacl type; }; template< typename T, unsigned int I> struct tag_of< viennacl::hankel_matrix > { typedef viennacl::tag_viennacl type; }; template< typename T, unsigned int I> struct tag_of< viennacl::toeplitz_matrix > { typedef viennacl::tag_viennacl type; }; template< typename T, unsigned int I> struct tag_of< viennacl::vandermonde_matrix > { typedef viennacl::tag_viennacl type; }; /** \endcond */ // ---------------------------------------------------- } // end namespace traits /** @brief Meta function which checks whether a tag is tag_mtl4 * * This is an internal function only, there is no need for a library user of ViennaCL to care about it any further */ template struct is_mtl4 { enum { value = false }; }; /** \cond */ template <> struct is_mtl4< viennacl::tag_mtl4 > { enum { value = true }; }; /** \endcond */ /** @brief Meta function which checks whether a tag is tag_eigen * * This is an internal function only, there is no need for a library user of ViennaCL to care about it any further */ template struct is_eigen { enum { value = false }; }; /** \cond */ template <> struct is_eigen< viennacl::tag_eigen > { enum { value = true }; }; /** \endcond */ /** @brief Meta function which checks whether a tag is tag_ublas * * This is an internal function only, there is no need for a library user of ViennaCL to care about it any further */ template struct is_ublas { enum { value = false }; }; /** \cond */ template <> struct is_ublas< viennacl::tag_ublas > { enum { value = true }; }; /** \endcond */ /** @brief Meta function which checks whether a tag is tag_ublas * * This is an internal function only, there is no need for a library user of ViennaCL to care about it any further */ template struct is_stl { enum { value = false }; }; /** \cond */ template <> struct is_stl< viennacl::tag_stl > { enum { value = true }; }; /** \endcond */ /** @brief Meta function which checks whether a tag is tag_viennacl * * This is an internal function only, there is no need for a library user of ViennaCL to care about it any further */ template struct is_viennacl { enum { value = false }; }; /** \cond */ template <> struct is_viennacl< viennacl::tag_viennacl > { enum { value = true }; }; /** \endcond */ } // end namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/compressed_compressed_matrix.hpp000644 001750 001750 00000066642 12267307531 024270 0ustar00rupprupp000000 000000 #ifndef VIENNACL_COMPRESSED_compressed_compressed_matrix_HPP_ #define VIENNACL_COMPRESSED_compressed_compressed_matrix_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/compressed_compressed_matrix.hpp @brief Implementation of the compressed_compressed_matrix class (CSR format with a relatively small number of nonzero rows) */ #include #include #include #include "viennacl/forwards.h" #include "viennacl/vector.hpp" #include "viennacl/linalg/sparse_matrix_operations.hpp" #include "viennacl/tools/tools.hpp" #include "viennacl/tools/entry_proxy.hpp" namespace viennacl { namespace detail { template void copy_impl(const CPU_MATRIX & cpu_matrix, compressed_compressed_matrix & gpu_matrix, vcl_size_t nonzero_rows, vcl_size_t nonzeros) { assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); viennacl::backend::typesafe_host_array row_buffer(gpu_matrix.handle1(), nonzero_rows + 1); viennacl::backend::typesafe_host_array row_indices(gpu_matrix.handle3(), nonzero_rows); viennacl::backend::typesafe_host_array col_buffer(gpu_matrix.handle2(), nonzeros); std::vector elements(nonzeros); vcl_size_t row_index = 0; vcl_size_t data_index = 0; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { bool row_empty = true; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { SCALARTYPE entry = *col_it; if (entry != SCALARTYPE(0)) { if (row_empty) { assert(row_index < nonzero_rows && bool("Provided count of nonzero rows exceeded!")); row_empty = false; row_buffer.set(row_index, data_index); row_indices.set(row_index, col_it.index1()); ++row_index; } col_buffer.set(data_index, col_it.index2()); elements[data_index] = entry; ++data_index; } } } row_buffer.set(row_index, data_index); gpu_matrix.set(row_buffer.get(), row_indices.get(), col_buffer.get(), &elements[0], cpu_matrix.size1(), cpu_matrix.size2(), nonzero_rows, nonzeros); } } //provide copy-operation: /** @brief Copies a sparse matrix from the host to the OpenCL device (either GPU or multi-core CPU) * * There are some type requirements on the CPU_MATRIX type (fulfilled by e.g. boost::numeric::ublas): * - .size1() returns the number of rows * - .size2() returns the number of columns * - const_iterator1 is a type definition for an iterator along increasing row indices * - const_iterator2 is a type definition for an iterator along increasing columns indices * - The const_iterator1 type provides an iterator of type const_iterator2 via members .begin() and .end() that iterates along column indices in the current row. * - The types const_iterator1 and const_iterator2 provide members functions .index1() and .index2() that return the current row and column indices respectively. * - Dereferenciation of an object of type const_iterator2 returns the entry. * * @param cpu_matrix A sparse matrix on the host. * @param gpu_matrix A compressed_compressed_matrix from ViennaCL */ template void copy(const CPU_MATRIX & cpu_matrix, compressed_compressed_matrix & gpu_matrix ) { //std::cout << "copy for (" << cpu_matrix.size1() << ", " << cpu_matrix.size2() << ", " << cpu_matrix.nnz() << ")" << std::endl; if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) { //determine nonzero rows and total nonzeros: vcl_size_t num_entries = 0; vcl_size_t nonzero_rows = 0; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { bool row_empty = true; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { if (*col_it != SCALARTYPE(0)) { ++num_entries; if (row_empty) { row_empty = false; ++nonzero_rows; } } } } if (num_entries == 0) //we copy an empty matrix num_entries = 1; //set up matrix entries: detail::copy_impl(cpu_matrix, gpu_matrix, nonzero_rows, num_entries); } } //adapted for std::vector< std::map < > > argument: /** @brief Copies a sparse square matrix in the std::vector< std::map < > > format to an OpenCL device. Use viennacl::tools::sparse_matrix_adapter for non-square matrices. * * @param cpu_matrix A sparse square matrix on the host using STL types * @param gpu_matrix A compressed_compressed_matrix from ViennaCL */ template void copy(const std::vector< std::map > & cpu_matrix, compressed_compressed_matrix & gpu_matrix ) { vcl_size_t nonzero_rows = 0; vcl_size_t nonzeros = 0; vcl_size_t max_col = 0; for (vcl_size_t i=0; i 0) ++nonzero_rows; nonzeros += cpu_matrix[i].size(); if (cpu_matrix[i].size() > 0) max_col = std::max(max_col, (cpu_matrix[i].rbegin())->first); } viennacl::detail::copy_impl(tools::const_sparse_matrix_adapter(cpu_matrix, cpu_matrix.size(), max_col + 1), gpu_matrix, nonzero_rows, nonzeros); } // // gpu to cpu: // /** @brief Copies a sparse matrix from the OpenCL device (either GPU or multi-core CPU) to the host. * * There are two type requirements on the CPU_MATRIX type (fulfilled by e.g. boost::numeric::ublas): * - resize(rows, cols) A resize function to bring the matrix into the correct size * - operator(i,j) Write new entries via the parenthesis operator * * @param gpu_matrix A compressed_compressed_matrix from ViennaCL * @param cpu_matrix A sparse matrix on the host. */ template void copy(const compressed_compressed_matrix & gpu_matrix, CPU_MATRIX & cpu_matrix ) { assert( (cpu_matrix.size1() == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (cpu_matrix.size2() == gpu_matrix.size2()) && bool("Size mismatch") ); if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) { //get raw data from memory: viennacl::backend::typesafe_host_array row_buffer(gpu_matrix.handle1(), gpu_matrix.nnz1() + 1); viennacl::backend::typesafe_host_array row_indices(gpu_matrix.handle1(), gpu_matrix.nnz1()); viennacl::backend::typesafe_host_array col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); std::vector elements(gpu_matrix.nnz()); //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl; viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); viennacl::backend::memory_read(gpu_matrix.handle3(), 0, row_indices.raw_size(), row_indices.get()); viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)* gpu_matrix.nnz(), &(elements[0])); //fill the cpu_matrix: vcl_size_t data_index = 0; for (vcl_size_t i = 1; i < row_buffer.size(); ++i) { while (data_index < row_buffer[i]) { if (col_buffer[data_index] >= gpu_matrix.size2()) { std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl; return; } if (elements[data_index] != static_cast(0.0)) cpu_matrix(row_indices[i-1], col_buffer[data_index]) = elements[data_index]; ++data_index; } } } } /** @brief Copies a sparse matrix from an OpenCL device to the host. The host type is the std::vector< std::map < > > format . * * @param gpu_matrix A compressed_compressed_matrix from ViennaCL * @param cpu_matrix A sparse matrix on the host. */ template void copy(const compressed_compressed_matrix & gpu_matrix, std::vector< std::map > & cpu_matrix) { tools::sparse_matrix_adapter temp(cpu_matrix, cpu_matrix.size(), cpu_matrix.size()); copy(gpu_matrix, temp); } //////////////////////// compressed_compressed_matrix ////////////////////////// /** @brief A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows carry nonzero entries. * * The difference to the 'standard' CSR format is that there is an additional array 'row_indices' so that the i-th set of indices in the CSR-layout refers to row_indices[i]. * * @tparam SCALARTYPE The floating point type (either float or double, checked at compile time) * @tparam ALIGNMENT The internal memory size for the entries in each row is given by (size()/ALIGNMENT + 1) * ALIGNMENT. ALIGNMENT must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. */ template class compressed_compressed_matrix { public: typedef viennacl::backend::mem_handle handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; /** @brief Default construction of a compressed matrix. No memory is allocated */ compressed_compressed_matrix() : rows_(0), cols_(0), nonzero_rows_(0), nonzeros_(0) {} /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated * * @param rows Number of rows * @param cols Number of columns * @param nonzero_rows Optional number of nonzero rows for memory preallocation * @param nonzeros Optional number of nonzeros for memory preallocation * @param ctx Context in which to create the matrix. Uses the default context if omitted */ explicit compressed_compressed_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzero_rows = 0, vcl_size_t nonzeros = 0, viennacl::context ctx = viennacl::context()) : rows_(rows), cols_(cols), nonzero_rows_(nonzero_rows), nonzeros_(nonzeros) { row_buffer_.switch_active_handle_id(ctx.memory_type()); row_indices_.switch_active_handle_id(ctx.memory_type()); col_buffer_.switch_active_handle_id(ctx.memory_type()); elements_.switch_active_handle_id(ctx.memory_type()); #ifdef VIENNACL_WITH_OPENCL if (ctx.memory_type() == OPENCL_MEMORY) { row_buffer_.opencl_handle().context(ctx.opencl_context()); row_indices_.opencl_handle().context(ctx.opencl_context()); col_buffer_.opencl_handle().context(ctx.opencl_context()); elements_.opencl_handle().context(ctx.opencl_context()); } #endif if (rows > 0) { viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array().element_size() * (rows + 1), ctx); } if (nonzeros > 0) { viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array().element_size() * nonzeros, ctx); viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * nonzeros, ctx); } } /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated * * @param rows Number of rows * @param cols Number of columns * @param ctx Context in which to create the matrix */ explicit compressed_compressed_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx) : rows_(rows), cols_(cols), nonzeros_(0) { row_buffer_.switch_active_handle_id(ctx.memory_type()); col_buffer_.switch_active_handle_id(ctx.memory_type()); elements_.switch_active_handle_id(ctx.memory_type()); #ifdef VIENNACL_WITH_OPENCL if (ctx.memory_type() == OPENCL_MEMORY) { row_buffer_.opencl_handle().context(ctx.opencl_context()); col_buffer_.opencl_handle().context(ctx.opencl_context()); elements_.opencl_handle().context(ctx.opencl_context()); } #endif if (rows > 0) { viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array().element_size() * (rows + 1), ctx); } } explicit compressed_compressed_matrix(viennacl::context ctx) : rows_(0), cols_(0), nonzero_rows_(0), nonzeros_(0) { row_buffer_.switch_active_handle_id(ctx.memory_type()); row_indices_.switch_active_handle_id(ctx.memory_type()); col_buffer_.switch_active_handle_id(ctx.memory_type()); elements_.switch_active_handle_id(ctx.memory_type()); #ifdef VIENNACL_WITH_OPENCL if (ctx.memory_type() == OPENCL_MEMORY) { row_buffer_.opencl_handle().context(ctx.opencl_context()); row_indices_.opencl_handle().context(ctx.opencl_context()); col_buffer_.opencl_handle().context(ctx.opencl_context()); elements_.opencl_handle().context(ctx.opencl_context()); } #endif } #ifdef VIENNACL_WITH_OPENCL explicit compressed_compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_row_indices, cl_mem mem_col_buffer, cl_mem mem_elements, vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzero_rows, vcl_size_t nonzeros) : rows_(rows), cols_(cols), nonzero_rows_(nonzero_rows), nonzeros_(nonzeros) { row_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); row_buffer_.opencl_handle() = mem_row_buffer; row_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. row_buffer_.raw_size(sizeof(cl_uint) * (nonzero_rows + 1)); row_indices_.switch_active_handle_id(viennacl::OPENCL_MEMORY); row_indices_.opencl_handle() = mem_row_indices; row_indices_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. row_indices_.raw_size(sizeof(cl_uint) * nonzero_rows); col_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); col_buffer_.opencl_handle() = mem_col_buffer; col_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. col_buffer_.raw_size(sizeof(cl_uint) * nonzeros); elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); elements_.opencl_handle() = mem_elements; elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. elements_.raw_size(sizeof(SCALARTYPE) * nonzeros); } #endif /** @brief Assignment a compressed matrix from possibly another memory domain. */ compressed_compressed_matrix & operator=(compressed_compressed_matrix const & other) { assert( (rows_ == 0 || rows_ == other.size1()) && bool("Size mismatch") ); assert( (cols_ == 0 || cols_ == other.size2()) && bool("Size mismatch") ); rows_ = other.size1(); cols_ = other.size2(); nonzero_rows_ = other.nnz1(); nonzeros_ = other.nnz(); viennacl::backend::typesafe_memory_copy(other.row_buffer_, row_buffer_); viennacl::backend::typesafe_memory_copy(other.row_indices_, row_indices_); viennacl::backend::typesafe_memory_copy(other.col_buffer_, col_buffer_); viennacl::backend::typesafe_memory_copy(other.elements_, elements_); return *this; } /** @brief Sets the row, column and value arrays of the compressed matrix * * @param row_jumper Pointer to an array holding the indices of the first element of each row (starting with zero). E.g. row_jumper[10] returns the index of the first entry of the 11th row. The array length is 'cols + 1' * @param row_indices Array holding the indices of the nonzero rows * @param col_buffer Pointer to an array holding the column index of each entry. The array length is 'nonzeros' * @param elements Pointer to an array holding the entries of the sparse matrix. The array length is 'elements' * @param rows Number of rows of the sparse matrix * @param cols Number of columns of the sparse matrix * @param nonzero_rows Number of nonzero rows * @param nonzeros Total number of nonzero entries */ void set(const void * row_jumper, const void * row_indices, const void * col_buffer, const SCALARTYPE * elements, vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzero_rows, vcl_size_t nonzeros) { assert( (rows > 0) && bool("Error in compressed_compressed_matrix::set(): Number of rows must be larger than zero!")); assert( (cols > 0) && bool("Error in compressed_compressed_matrix::set(): Number of columns must be larger than zero!")); assert( (nonzero_rows > 0) && bool("Error in compressed_compressed_matrix::set(): Number of nonzero rows must be larger than zero!")); assert( (nonzeros > 0) && bool("Error in compressed_compressed_matrix::set(): Number of nonzeros must be larger than zero!")); //std::cout << "Setting memory: " << cols + 1 << ", " << nonzeros << std::endl; viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array(row_buffer_).element_size() * (rows + 1), viennacl::traits::context(row_buffer_), row_jumper); viennacl::backend::memory_create(row_indices_, viennacl::backend::typesafe_host_array(row_indices_).element_size() * (rows + 1), viennacl::traits::context(row_indices_), row_indices); viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array(col_buffer_).element_size() * nonzeros, viennacl::traits::context(col_buffer_), col_buffer); viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * nonzeros, viennacl::traits::context(elements_), elements); nonzeros_ = nonzeros; nonzero_rows_ = nonzero_rows; rows_ = rows; cols_ = cols; } /** @brief Returns the number of rows */ const vcl_size_t & size1() const { return rows_; } /** @brief Returns the number of columns */ const vcl_size_t & size2() const { return cols_; } /** @brief Returns the number of nonzero entries */ const vcl_size_t & nnz1() const { return nonzero_rows_; } /** @brief Returns the number of nonzero entries */ const vcl_size_t & nnz() const { return nonzeros_; } /** @brief Returns the OpenCL handle to the row index array */ const handle_type & handle1() const { return row_buffer_; } /** @brief Returns the OpenCL handle to the column index array */ const handle_type & handle2() const { return col_buffer_; } /** @brief Returns the OpenCL handle to the row index array */ const handle_type & handle3() const { return row_indices_; } /** @brief Returns the OpenCL handle to the matrix entry array */ const handle_type & handle() const { return elements_; } /** @brief Returns the OpenCL handle to the row index array */ handle_type & handle1() { return row_buffer_; } /** @brief Returns the OpenCL handle to the column index array */ handle_type & handle2() { return col_buffer_; } /** @brief Returns the OpenCL handle to the row index array */ handle_type & handle3() { return row_indices_; } /** @brief Returns the OpenCL handle to the matrix entry array */ handle_type & handle() { return elements_; } void switch_memory_context(viennacl::context new_ctx) { viennacl::backend::switch_memory_context(row_buffer_, new_ctx); viennacl::backend::switch_memory_context(row_indices_, new_ctx); viennacl::backend::switch_memory_context(col_buffer_, new_ctx); viennacl::backend::switch_memory_context(elements_, new_ctx); } viennacl::memory_types memory_context() const { return row_buffer_.get_active_handle_id(); } private: vcl_size_t rows_; vcl_size_t cols_; vcl_size_t nonzero_rows_; vcl_size_t nonzeros_; handle_type row_buffer_; handle_type row_indices_; handle_type col_buffer_; handle_type elements_; }; // // Specify available operations: // /** \cond */ namespace linalg { namespace detail { // x = A * y template struct op_executor, op_assign, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { // check for the special case x = A * x if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs = temp; } else viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); } }; template struct op_executor, op_inplace_add, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs += temp; } }; template struct op_executor, op_inplace_sub, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs -= temp; } }; // x = A * vec_op template struct op_executor, op_assign, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs()); viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); } }; // x = A * vec_op template struct op_executor, op_inplace_add, vector_expression, vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs += temp_result; } }; // x = A * vec_op template struct op_executor, op_inplace_sub, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs -= temp_result; } }; } // namespace detail } // namespace linalg /** \endcond */ } #endif ViennaCL-1.5.1-src/viennacl/hyb_matrix.hpp000644 001750 001750 00000036607 12267307531 020460 0ustar00rupprupp000000 000000 #ifndef VIENNACL_HYB_MATRIX_HPP_ #define VIENNACL_HYB_MATRIX_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/hyb_matrix.hpp @brief Implementation of the hyb_matrix class Contributed by Volodymyr Kysenko. */ #include "viennacl/forwards.h" #include "viennacl/vector.hpp" #include "viennacl/tools/tools.hpp" #include "viennacl/linalg/sparse_matrix_operations.hpp" namespace viennacl { /** @brief Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros. */ template class hyb_matrix { public: typedef viennacl::backend::mem_handle handle_type; typedef scalar::ResultType> value_type; hyb_matrix() : csr_threshold_(SCALARTYPE(0.8)), rows_(0), cols_(0) {} hyb_matrix(viennacl::context ctx) : csr_threshold_(SCALARTYPE(0.8)), rows_(0), cols_(0) { ell_coords_.switch_active_handle_id(ctx.memory_type()); ell_elements_.switch_active_handle_id(ctx.memory_type()); csr_rows_.switch_active_handle_id(ctx.memory_type()); csr_cols_.switch_active_handle_id(ctx.memory_type()); csr_elements_.switch_active_handle_id(ctx.memory_type()); #ifdef VIENNACL_WITH_OPENCL if (ctx.memory_type() == OPENCL_MEMORY) { ell_coords_.opencl_handle().context(ctx.opencl_context()); ell_elements_.opencl_handle().context(ctx.opencl_context()); csr_rows_.opencl_handle().context(ctx.opencl_context()); csr_cols_.opencl_handle().context(ctx.opencl_context()); csr_elements_.opencl_handle().context(ctx.opencl_context()); } #endif } SCALARTYPE csr_threshold() const { return csr_threshold_; } void csr_threshold(SCALARTYPE thr) { csr_threshold_ = thr; } vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple(rows_, ALIGNMENT); } vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple(cols_, ALIGNMENT); } vcl_size_t size1() const { return rows_; } vcl_size_t size2() const { return cols_; } vcl_size_t internal_ellnnz() const {return viennacl::tools::align_to_multiple(ellnnz_, ALIGNMENT); } vcl_size_t ell_nnz() const { return ellnnz_; } vcl_size_t csr_nnz() const { return csrnnz_; } const handle_type & handle() const { return ell_elements_; } const handle_type & handle2() const { return ell_coords_; } const handle_type & handle3() const { return csr_rows_; } const handle_type & handle4() const { return csr_cols_; } const handle_type & handle5() const { return csr_elements_; } public: #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment template friend void copy(const CPU_MATRIX & cpu_matrix, hyb_matrix & gpu_matrix ); #else template friend void copy(const CPU_MATRIX & cpu_matrix, hyb_matrix & gpu_matrix ); #endif private: SCALARTYPE csr_threshold_; vcl_size_t rows_; vcl_size_t cols_; vcl_size_t ellnnz_; vcl_size_t csrnnz_; handle_type ell_coords_; // ell coords handle_type ell_elements_; // ell elements handle_type csr_rows_; handle_type csr_cols_; handle_type csr_elements_; }; template void copy(const CPU_MATRIX& cpu_matrix, hyb_matrix& gpu_matrix ) { assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); if(cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0) { //determine max capacity for row vcl_size_t max_entries_per_row = 0; std::vector hist_entries(cpu_matrix.size1() + 1, 0); for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { vcl_size_t num_entries = 0; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ++num_entries; } hist_entries[num_entries] += 1; max_entries_per_row = std::max(max_entries_per_row, num_entries); } vcl_size_t sum = 0; for(vcl_size_t ind = 0; ind <= max_entries_per_row; ind++) { sum += hist_entries[ind]; if(sum >= gpu_matrix.csr_threshold() * cpu_matrix.size1()) { max_entries_per_row = ind; break; } } //setup GPU matrix gpu_matrix.ellnnz_ = max_entries_per_row; gpu_matrix.rows_ = cpu_matrix.size1(); gpu_matrix.cols_ = cpu_matrix.size2(); vcl_size_t nnz = gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz(); viennacl::backend::typesafe_host_array ell_coords(gpu_matrix.ell_coords_, nnz); viennacl::backend::typesafe_host_array csr_rows(gpu_matrix.csr_rows_, cpu_matrix.size1() + 1); std::vector csr_cols; std::vector ell_elements(nnz); std::vector csr_elements; vcl_size_t csr_index = 0; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { vcl_size_t data_index = 0; csr_rows.set(row_it.index1(), csr_index); for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { if(data_index < max_entries_per_row) { ell_coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2()); ell_elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it; } else { csr_cols.push_back(static_cast(col_it.index2())); csr_elements.push_back(*col_it); csr_index++; } data_index++; } } if(csr_cols.empty()) { csr_cols.push_back(0); csr_elements.push_back(0); } csr_rows.set(csr_rows.size() - 1, csr_index); gpu_matrix.csrnnz_ = csr_cols.size(); viennacl::backend::typesafe_host_array csr_cols_for_gpu(gpu_matrix.csr_cols_, csr_cols.size()); for (vcl_size_t i=0; i void copy(const hyb_matrix& gpu_matrix, CPU_MATRIX& cpu_matrix) { assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); if(gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0) { std::vector ell_elements(gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz()); viennacl::backend::typesafe_host_array ell_coords(gpu_matrix.handle2(), gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz()); std::vector csr_elements(gpu_matrix.csr_nnz()); viennacl::backend::typesafe_host_array csr_rows(gpu_matrix.handle3(), gpu_matrix.size1() + 1); viennacl::backend::typesafe_host_array csr_cols(gpu_matrix.handle4(), gpu_matrix.csr_nnz()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * ell_elements.size(), &(ell_elements[0])); viennacl::backend::memory_read(gpu_matrix.handle2(), 0, ell_coords.raw_size(), ell_coords.get()); viennacl::backend::memory_read(gpu_matrix.handle3(), 0, csr_rows.raw_size(), csr_rows.get()); viennacl::backend::memory_read(gpu_matrix.handle4(), 0, csr_cols.raw_size(), csr_cols.get()); viennacl::backend::memory_read(gpu_matrix.handle5(), 0, sizeof(SCALARTYPE) * csr_elements.size(), &(csr_elements[0])); for(vcl_size_t row = 0; row < gpu_matrix.size1(); row++) { for(vcl_size_t ind = 0; ind < gpu_matrix.internal_ellnnz(); ind++) { vcl_size_t offset = gpu_matrix.internal_size1() * ind + row; if(ell_elements[offset] == static_cast(0.0)) { continue; } if(ell_coords[offset] >= gpu_matrix.size2()) { std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << ell_coords[offset] << " " << gpu_matrix.size2() << std::endl; return; } cpu_matrix(row, ell_coords[offset]) = ell_elements[offset]; } for(vcl_size_t ind = csr_rows[row]; ind < csr_rows[row+1]; ind++) { if(csr_elements[ind] == static_cast(0.0)) { continue; } if(csr_cols[ind] >= gpu_matrix.size2()) { std::cerr << "ViennaCL encountered invalid data " << std::endl; return; } cpu_matrix(row, csr_cols[ind]) = csr_elements[ind]; } } } } // // Specify available operations: // /** \cond */ namespace linalg { namespace detail { // x = A * y template struct op_executor, op_assign, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { // check for the special case x = A * x if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs = temp; } else viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); } }; template struct op_executor, op_inplace_add, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs += temp; } }; template struct op_executor, op_inplace_sub, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs -= temp; } }; // x = A * vec_op template struct op_executor, op_assign, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); } }; // x = A * vec_op template struct op_executor, op_inplace_add, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs += temp_result; } }; // x = A * vec_op template struct op_executor, op_inplace_sub, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs -= temp_result; } }; } // namespace detail } // namespace linalg /** \endcond */ } #endif ViennaCL-1.5.1-src/viennacl/io/000755 001750 001750 00000000000 12267307531 016174 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/viennacl/io/matrix_market.hpp000644 001750 001750 00000035032 12267307531 021557 0ustar00rupprupp000000 000000 #ifndef VIENNACL_IO_MATRIX_MARKET_HPP #define VIENNACL_IO_MATRIX_MARKET_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file matrix_market.hpp @brief A reader and writer for the matrix market format is implemented here */ #include #include #include #include #include #include #include #include #include "viennacl/tools/adapter.hpp" #include "viennacl/traits/size.hpp" #include "viennacl/traits/fill.hpp" namespace viennacl { namespace io { //helper namespace detail { inline void trim(char * buffer, long max_size) { //trim at beginning of string long start = 0; for (long i=0; i (std::tolower)); return s; } } //namespace ///////// reader //////////// /** @brief Reads a sparse or dense matrix from a file (MatrixMarket format) * * @param mat The matrix that is to be read * @param file Filename from which the matrix should be read * @param index_base The index base, typically 1 * @tparam MatrixType A generic matrix type. Type requirements: size1() returns number of rows, size2() returns number columns, operator() writes array entries, resize() allows resizing the matrix. * @return Returns nonzero if file is read correctly */ template long read_matrix_market_file_impl(MatrixType & mat, const char * file, long index_base) { typedef typename viennacl::result_of::cpu_value_type::type>::type ScalarType; //std::cout << "Reading matrix market file" << std::endl; char buffer[1025]; std::ifstream reader(file); std::string token; long linenum = 0; bool symmetric = false; bool dense_format = false; bool is_header = true; long cur_row = 0; long cur_col = 0; long valid_entries = 0; long nnz = 0; if (!reader){ std::cerr << "ViennaCL: Matrix Market Reader: Cannot open file " << file << std::endl; return EXIT_FAILURE; } while (reader.good()) { // get a non-empty line do { reader.getline(buffer, 1024); ++linenum; detail::trim(buffer, 1024); } while (reader.good() && buffer[0] == 0); if (buffer[0] == '%') { if (buffer[1] == '%') { //parse header: std::stringstream line(std::string(buffer + 2)); line >> token; if (detail::tolower(token) != "matrixmarket") { std::cerr << "Error in file " << file << " at line " << linenum << " in file " << file << ": Expected 'MatrixMarket', got '" << token << "'" << std::endl; return 0; } line >> token; if (detail::tolower(token) != "matrix") { std::cerr << "Error in file " << file << " at line " << linenum << " in file " << file << ": Expected 'matrix', got '" << token << "'" << std::endl; return 0; } line >> token; if (detail::tolower(token) != "coordinate") { if (detail::tolower(token) == "array") { dense_format = true; std::cerr << "Error in file " << file << " at line " << linenum << " in file " << file << ": 'array' type is not supported yet!" << std::endl; return 0; } else { std::cerr << "Error in file " << file << " at line " << linenum << " in file " << file << ": Expected 'array' or 'coordinate', got '" << token << "'" << std::endl; return 0; } } line >> token; if (detail::tolower(token) != "real") { std::cerr << "Error in file " << file << ": The MatrixMarket reader provided with ViennaCL supports only real valued floating point arithmetic." << std::endl; return 0; } line >> token; if (detail::tolower(token) == "general"){ } else if (detail::tolower(token) == "symmetric"){ symmetric = true; } else { std::cerr << "Error in file " << file << ": The MatrixMarket reader provided with ViennaCL supports only general or symmetric matrices." << std::endl; return 0; } } } else { std::stringstream line(std::stringstream::in | std::stringstream::out); line << std::string(buffer); if (is_header) { //read header line long rows; long cols; if (line.good()) line >> rows; else { std::cerr << "Error in file " << file << ": Could not get matrix dimensions (rows) in line " << linenum << std::endl; return 0; } if (line.good()) line >> cols; else { std::cerr << "Error in file " << file << ": Could not get matrix dimensions (columns) in line " << linenum << std::endl; return 0; } if (!dense_format) { if (line.good()) line >> nnz; else { std::cerr << "Error in file " << file << ": Could not get matrix dimensions (columns) in line " << linenum << std::endl; return 0; } } if (rows > 0 && cols > 0) viennacl::traits::resize(mat, rows, cols); is_header = false; } else { //read data if (dense_format) { ScalarType value; line >> value; viennacl::traits::fill(mat, cur_row, cur_col, value); if (++cur_row == static_cast(viennacl::traits::size1(mat))) { //next column ++cur_col; cur_row = 0; } } else //sparse format { long row; long col; ScalarType value; //parse data: if (line.good()) line >> row; else { std::cerr << "Error in file " << file << ": Parse error for matrix entry in line " << linenum << std::endl; return 0; } if (line.good()) line >> col; else { std::cerr << "Error in file " << file << ": Parse error for matrix entry in line " << linenum << std::endl; return 0; } //take index_base base into account: row -= index_base; col -= index_base; if (line.good()) line >> value; else { std::cerr << "Error in file " << file << ": Parse error for matrix entry in line " << linenum << std::endl; return 0; } if (row >= static_cast(viennacl::traits::size1(mat)) || row < 0) { std::cerr << "Error in file " << file << " at line " << linenum << ": Row index out of bounds: " << row << " (matrix dim: " << viennacl::traits::size1(mat) << " x " << viennacl::traits::size2(mat) << ")" << std::endl; return 0; } if (col >= static_cast(viennacl::traits::size2(mat)) || col < 0) { std::cerr << "Error in file " << file << " at line " << linenum << ": Column index out of bounds: " << col << " (matrix dim: " << viennacl::traits::size1(mat) << " x " << viennacl::traits::size2(mat) << ")" << std::endl; return 0; } viennacl::traits::fill(mat, row, col, value); //basically equivalent to mat(row, col) = value; if (symmetric) viennacl::traits::fill(mat, col, row, value); //basically equivalent to mat(col, row) = value; if (++valid_entries == nnz) break; } //else dense_format } } } //std::cout << linenum << " lines read." << std::endl; reader.close(); return linenum; } /** @brief Reads a sparse matrix from a file (MatrixMarket format) * * @param mat The matrix that is to be read (ublas-types and std::vector< std::map > are supported) * @param file The filename * @param index_base The index base, typically 1 * @tparam MatrixType A generic matrix type. Type requirements: size1() returns number of rows, size2() returns number columns, operator() writes array entries, resize() allows resizing the matrix. * @return Returns nonzero if file is read correctly */ template long read_matrix_market_file(MatrixType & mat, const char * file, long index_base = 1) { return read_matrix_market_file_impl(mat, file, index_base); } template long read_matrix_market_file(MatrixType & mat, const std::string & file, long index_base = 1) { return read_matrix_market_file_impl(mat, file.c_str(), index_base); } template long read_matrix_market_file(std::vector< std::map > & mat, const char * file, long index_base = 1) { viennacl::tools::sparse_matrix_adapter adapted_matrix(mat); return read_matrix_market_file_impl(adapted_matrix, file, index_base); } template long read_matrix_market_file(std::vector< std::map > & mat, const std::string & file, long index_base = 1) { viennacl::tools::sparse_matrix_adapter adapted_matrix(mat); return read_matrix_market_file_impl(adapted_matrix, file.c_str(), index_base); } ////////// writer ///////////// template void write_matrix_market_file_impl(MatrixType const & mat, const char * file, long index_base) { std::ofstream writer(file); long num_entries = 0; for (typename MatrixType::const_iterator1 row_it = mat.begin1(); row_it != mat.end1(); ++row_it) for (typename MatrixType::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) ++num_entries; writer << "%%MatrixMarket matrix coordinate real general" << std::endl; writer << mat.size1() << " " << mat.size2() << " " << num_entries << std::endl; for (typename MatrixType::const_iterator1 row_it = mat.begin1(); row_it != mat.end1(); ++row_it) for (typename MatrixType::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) writer << col_it.index1() + index_base << " " << col_it.index2() + index_base << " " << *col_it << std::endl; writer.close(); } template void write_matrix_market_file(std::vector< std::map > const & mat, const char * file, long index_base = 1) { viennacl::tools::const_sparse_matrix_adapter adapted_matrix(mat); return write_matrix_market_file_impl(adapted_matrix, file, index_base); } template void write_matrix_market_file(std::vector< std::map > const & mat, const std::string & file, long index_base = 1) { viennacl::tools::const_sparse_matrix_adapter adapted_matrix(mat); return write_matrix_market_file_impl(adapted_matrix, file.c_str(), index_base); } /** @brief Writes a sparse matrix to a file (MatrixMarket format) * * @param mat The matrix that is to be read (ublas-types and std::vector< std::map > are supported) * @param file The filename * @param index_base The index base, typically 1 * @tparam MatrixType A generic matrix type. Type requirements: size1() returns number of rows, size2() returns number columns, operator() writes array entries, resize() allows resizing the matrix. * @return Returns nonzero if file is read correctly */ template void write_matrix_market_file(MatrixType const & mat, const std::string & file, long index_base = 1) { write_matrix_market_file_impl(mat, file.c_str(), index_base); } } //namespace io } //namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/ell_matrix.hpp000644 001750 001750 00000030333 12267307531 020440 0ustar00rupprupp000000 000000 #ifndef VIENNACL_ELL_MATRIX_HPP_ #define VIENNACL_ELL_MATRIX_HPP_ /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/ell_matrix.hpp @brief Implementation of the ell_matrix class Contributed by Volodymyr Kysenko. */ #include "viennacl/forwards.h" #include "viennacl/vector.hpp" #include "viennacl/tools/tools.hpp" #include "viennacl/linalg/sparse_matrix_operations.hpp" namespace viennacl { /** @brief Sparse matrix class using the ELLPACK format for storing the nonzeros. * * This format works best for matrices where the number of nonzeros per row is mostly the same. * Finite element and finite difference methods on nicely shaped domains often result in such a nonzero pattern. * For a matrix * * (1 2 0 0 0) * (2 3 4 0 0) * (0 5 6 0 7) * (0 0 8 9 0) * * the entries are layed out in chunks of size 3 as * (1 2 5 8; 2 3 6 9; 0 4 7 0) * Note that this is a 'transposed' representation in order to maximize coalesced memory access. */ template class ell_matrix { public: typedef viennacl::backend::mem_handle handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; ell_matrix() : rows_(0), cols_(0), maxnnz_(0) {} ell_matrix(viennacl::context ctx) : rows_(0), cols_(0), maxnnz_(0) { coords_.switch_active_handle_id(ctx.memory_type()); elements_.switch_active_handle_id(ctx.memory_type()); #ifdef VIENNACL_WITH_OPENCL if (ctx.memory_type() == OPENCL_MEMORY) { coords_.opencl_handle().context(ctx.opencl_context()); elements_.opencl_handle().context(ctx.opencl_context()); } #endif } public: vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple(rows_, ALIGNMENT); } vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple(cols_, ALIGNMENT); } vcl_size_t size1() const { return rows_; } vcl_size_t size2() const { return cols_; } vcl_size_t internal_maxnnz() const {return viennacl::tools::align_to_multiple(maxnnz_, ALIGNMENT); } vcl_size_t maxnnz() const { return maxnnz_; } vcl_size_t nnz() const { return rows_ * maxnnz_; } vcl_size_t internal_nnz() const { return internal_size1() * internal_maxnnz(); } handle_type & handle() { return elements_; } const handle_type & handle() const { return elements_; } handle_type & handle2() { return coords_; } const handle_type & handle2() const { return coords_; } #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment template friend void copy(const CPU_MATRIX & cpu_matrix, ell_matrix & gpu_matrix ); #else template friend void copy(const CPU_MATRIX & cpu_matrix, ell_matrix & gpu_matrix ); #endif private: vcl_size_t rows_; vcl_size_t cols_; vcl_size_t maxnnz_; handle_type coords_; handle_type elements_; }; template void copy(const CPU_MATRIX& cpu_matrix, ell_matrix& gpu_matrix ) { assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); if(cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0) { //determine max capacity for row vcl_size_t max_entries_per_row = 0; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { vcl_size_t num_entries = 0; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ++num_entries; } max_entries_per_row = std::max(max_entries_per_row, num_entries); } //setup GPU matrix gpu_matrix.maxnnz_ = max_entries_per_row; gpu_matrix.rows_ = cpu_matrix.size1(); gpu_matrix.cols_ = cpu_matrix.size2(); vcl_size_t nnz = gpu_matrix.internal_nnz(); viennacl::backend::typesafe_host_array coords(gpu_matrix.handle2(), nnz); std::vector elements(nnz, 0); // std::cout << "ELL_MATRIX copy " << gpu_matrix.maxnnz_ << " " << gpu_matrix.rows_ << " " << gpu_matrix.cols_ << " " // << gpu_matrix.internal_maxnnz() << "\n"; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { vcl_size_t data_index = 0; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2()); elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it; //std::cout << *col_it << "\n"; data_index++; } } viennacl::backend::memory_create(gpu_matrix.handle2(), coords.raw_size(), traits::context(gpu_matrix.handle2()), coords.get()); viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * elements.size(), traits::context(gpu_matrix.handle()), &(elements[0])); } } template void copy(const ell_matrix& gpu_matrix, CPU_MATRIX& cpu_matrix) { assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); if(gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0) { std::vector elements(gpu_matrix.internal_nnz()); viennacl::backend::typesafe_host_array coords(gpu_matrix.handle2(), gpu_matrix.internal_nnz()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * elements.size(), &(elements[0])); viennacl::backend::memory_read(gpu_matrix.handle2(), 0, coords.raw_size(), coords.get()); for(vcl_size_t row = 0; row < gpu_matrix.size1(); row++) { for(vcl_size_t ind = 0; ind < gpu_matrix.internal_maxnnz(); ind++) { vcl_size_t offset = gpu_matrix.internal_size1() * ind + row; if(elements[offset] == static_cast(0.0)) continue; if(coords[offset] >= gpu_matrix.size2()) { std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << coords[offset] << " " << gpu_matrix.size2() << std::endl; return; } cpu_matrix(row, coords[offset]) = elements[offset]; } } } } // // Specify available operations: // /** \cond */ namespace linalg { namespace detail { // x = A * y template struct op_executor, op_assign, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { // check for the special case x = A * x if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs = temp; } else viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); } }; template struct op_executor, op_inplace_add, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs += temp; } }; template struct op_executor, op_inplace_sub, vector_expression, const vector_base, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_base, op_prod> const & rhs) { viennacl::vector temp(lhs); viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); lhs -= temp; } }; // x = A * vec_op template struct op_executor, op_assign, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); } }; // x = A * vec_op template struct op_executor, op_inplace_add, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs += temp_result; } }; // x = A * vec_op template struct op_executor, op_inplace_sub, vector_expression, const vector_expression, op_prod> > { static void apply(vector_base & lhs, vector_expression, const vector_expression, op_prod> const & rhs) { viennacl::vector temp(rhs.rhs(), viennacl::traits::context(rhs)); viennacl::vector temp_result(lhs); viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); lhs -= temp_result; } }; } // namespace detail } // namespace linalg /** \endcond */ } #endif ViennaCL-1.5.1-src/viennacl/scheduler/000755 001750 001750 00000000000 12267307531 017543 5ustar00rupprupp000000 000000 ViennaCL-1.5.1-src/viennacl/scheduler/execute_util.hpp000644 001750 001750 00000023177 12267307531 022765 0ustar00rupprupp000000 000000 #ifndef VIENNACL_SCHEDULER_EXECUTE_UTIL_HPP #define VIENNACL_SCHEDULER_EXECUTE_UTIL_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/scheduler/execute_util.hpp @brief Provides various utilities for implementing the execution of statements */ #include #include "viennacl/forwards.h" #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/scheduler/forwards.h" namespace viennacl { namespace scheduler { namespace detail { // inline lhs_rhs_element const & extract_representative_vector(statement const & s, lhs_rhs_element const & element) { switch (element.type_family) { case VECTOR_TYPE_FAMILY: return element; case COMPOSITE_OPERATION_FAMILY: { statement_node const & leaf = s.array()[element.node_index]; if (leaf.op.type_family == OPERATION_UNARY_TYPE_FAMILY) return extract_representative_vector(s, leaf.lhs); switch (leaf.op.type) { case OPERATION_BINARY_ADD_TYPE: case OPERATION_BINARY_SUB_TYPE: case OPERATION_BINARY_MULT_TYPE: case OPERATION_BINARY_DIV_TYPE: case OPERATION_BINARY_ELEMENT_PROD_TYPE: case OPERATION_BINARY_ELEMENT_DIV_TYPE: return extract_representative_vector(s, leaf.lhs); case OPERATION_BINARY_MAT_VEC_PROD_TYPE: return extract_representative_vector(s, leaf.rhs); default: throw statement_not_supported_exception("Vector leaf encountered an invalid binary operation!"); } } default: throw statement_not_supported_exception("Vector leaf encountered an invalid node type!"); } } // helper routines for extracting the scalar type inline float convert_to_float(float f) { return f; } inline float convert_to_float(double d) { return static_cast(d); } inline float convert_to_float(lhs_rhs_element const & el) { if (el.type_family == SCALAR_TYPE_FAMILY && el.subtype == HOST_SCALAR_TYPE && el.numeric_type == FLOAT_TYPE) return el.host_float; if (el.type_family == SCALAR_TYPE_FAMILY && el.subtype == DEVICE_SCALAR_TYPE && el.numeric_type == FLOAT_TYPE) return *el.scalar_float; throw statement_not_supported_exception("Cannot convert to float"); } // helper routines for extracting the scalar type inline double convert_to_double(float d) { return static_cast(d); } inline double convert_to_double(double d) { return d; } inline double convert_to_double(lhs_rhs_element const & el) { if (el.type_family == SCALAR_TYPE_FAMILY && el.subtype == HOST_SCALAR_TYPE && el.numeric_type == DOUBLE_TYPE) return el.host_double; if (el.type_family == SCALAR_TYPE_FAMILY && el.subtype == DEVICE_SCALAR_TYPE && el.numeric_type == DOUBLE_TYPE) return *el.scalar_double; throw statement_not_supported_exception("Cannot convert to double"); } /////////////////// Create/Destory temporary vector /////////////////////// inline void new_element(lhs_rhs_element & new_elem, lhs_rhs_element const & old_element) { new_elem.type_family = old_element.type_family; new_elem.subtype = old_element.subtype; new_elem.numeric_type = old_element.numeric_type; if (new_elem.type_family == SCALAR_TYPE_FAMILY) { assert(new_elem.subtype == DEVICE_SCALAR_TYPE && bool("Expected a device scalar in root node")); switch (new_elem.numeric_type) { case FLOAT_TYPE: new_elem.scalar_float = new viennacl::scalar(); return; case DOUBLE_TYPE: new_elem.scalar_double = new viennacl::scalar(); return; default: throw statement_not_supported_exception("Invalid vector type for vector construction"); } } else if (new_elem.type_family == VECTOR_TYPE_FAMILY) { assert(new_elem.subtype == DENSE_VECTOR_TYPE && bool("Expected a dense vector in root node")); switch (new_elem.numeric_type) { case FLOAT_TYPE: new_elem.vector_float = new viennacl::vector((old_element.vector_float)->size()); return; case DOUBLE_TYPE: new_elem.vector_double = new viennacl::vector((old_element.vector_float)->size()); return; default: throw statement_not_supported_exception("Invalid vector type for vector construction"); } } else if (new_elem.type_family == MATRIX_TYPE_FAMILY) { assert( (new_elem.subtype == DENSE_COL_MATRIX_TYPE || new_elem.subtype == DENSE_ROW_MATRIX_TYPE) && bool("Expected a dense matrix in root node")); if (new_elem.subtype == DENSE_COL_MATRIX_TYPE) { switch (new_elem.numeric_type) { case FLOAT_TYPE: new_elem.matrix_col_float = new viennacl::matrix((old_element.matrix_col_float)->size1(), (old_element.matrix_col_float)->size2()); return; case DOUBLE_TYPE: new_elem.matrix_col_double = new viennacl::matrix((old_element.matrix_col_double)->size1(), (old_element.matrix_col_double)->size2()); return; default: throw statement_not_supported_exception("Invalid vector type for vector construction"); } } else if (new_elem.subtype == DENSE_ROW_MATRIX_TYPE) { switch (new_elem.numeric_type) { case FLOAT_TYPE: new_elem.matrix_row_float = new viennacl::matrix((old_element.matrix_row_float)->size1(), (old_element.matrix_row_float)->size2()); return; case DOUBLE_TYPE: new_elem.matrix_row_double = new viennacl::matrix((old_element.matrix_row_double)->size1(), (old_element.matrix_row_double)->size2()); return; default: throw statement_not_supported_exception("Invalid vector type for vector construction"); } } else throw statement_not_supported_exception("Expected a dense matrix in root node when creating a temporary"); } else throw statement_not_supported_exception("Unknown type familty when creating new temporary object"); } inline void delete_element(lhs_rhs_element & elem) { if (elem.type_family == SCALAR_TYPE_FAMILY) { switch (elem.numeric_type) { case FLOAT_TYPE: delete elem.scalar_float; return; case DOUBLE_TYPE: delete elem.scalar_double; return; default: throw statement_not_supported_exception("Invalid vector type for vector destruction"); } } else if (elem.type_family == VECTOR_TYPE_FAMILY) { switch (elem.numeric_type) { case FLOAT_TYPE: delete elem.vector_float; return; case DOUBLE_TYPE: delete elem.vector_double; return; default: throw statement_not_supported_exception("Invalid vector type for vector destruction"); } } else if (elem.type_family == MATRIX_TYPE_FAMILY) { if (elem.subtype == DENSE_COL_MATRIX_TYPE) { switch (elem.numeric_type) { case FLOAT_TYPE: delete elem.matrix_col_float; return; case DOUBLE_TYPE: delete elem.matrix_col_double; return; default: throw statement_not_supported_exception("Invalid vector type for vector destruction"); } } else if (elem.subtype == DENSE_ROW_MATRIX_TYPE) { switch (elem.numeric_type) { case FLOAT_TYPE: delete elem.matrix_row_float; return; case DOUBLE_TYPE: delete elem.matrix_row_double; return; default: throw statement_not_supported_exception("Invalid vector type for vector destruction"); } } else throw statement_not_supported_exception("Expected a dense matrix in root node when deleting temporary"); } else throw statement_not_supported_exception("Unknown type familty when deleting temporary object"); } } // namespace detail } // namespace scheduler } // namespace viennacl #endif ViennaCL-1.5.1-src/viennacl/scheduler/forwards.h000644 001750 001750 00000105146 12267307531 021552 0ustar00rupprupp000000 000000 #ifndef VIENNACL_SCHEDULER_STATEMENT_HPP #define VIENNACL_SCHEDULER_STATEMENT_HPP /* ========================================================================= Copyright (c) 2010-2014, Institute for Microelectronics, Institute for Analysis and Scientific Computing, TU Wien. Portions of this software are copyright by UChicago Argonne, LLC. ----------------- ViennaCL - The Vienna Computing Library ----------------- Project Head: Karl Rupp rupp@iue.tuwien.ac.at (A list of authors and contributors can be found in the PDF manual) License: MIT (X11), see file LICENSE in the base directory ============================================================================= */ /** @file viennacl/scheduler/forwards.h @brief Provides the datastructures for dealing with a single statement such as 'x = y + z;' */ #include "viennacl/forwards.h" #include namespace viennacl { namespace scheduler { /** @brief Exception for the case the scheduler is unable to deal with the operation */ class statement_not_supported_exception : public std::exception { public: statement_not_supported_exception() : message_() {} statement_not_supported_exception(std::string message) : message_("ViennaCL: Internal error: The scheduler encountered a problem with the operation provided: " + message) {} virtual const char* what() const throw() { return message_.c_str(); } virtual ~statement_not_supported_exception() throw() {} private: std::string message_; }; /** @brief Optimization enum for grouping operations into unary or binary operations. Just for optimization of lookups. */ enum operation_node_type_family { OPERATION_INVALID_TYPE_FAMILY = 0, // unary or binary expression OPERATION_UNARY_TYPE_FAMILY, OPERATION_BINARY_TYPE_FAMILY }; /** @brief Enumeration for identifying the possible operations */ enum operation_node_type { OPERATION_INVALID_TYPE = 0, // unary expression OPERATION_UNARY_ABS_TYPE, OPERATION_UNARY_ACOS_TYPE, OPERATION_UNARY_ASIN_TYPE, OPERATION_UNARY_ATAN_TYPE, OPERATION_UNARY_CEIL_TYPE, OPERATION_UNARY_COS_TYPE, OPERATION_UNARY_COSH_TYPE, OPERATION_UNARY_EXP_TYPE, OPERATION_UNARY_FABS_TYPE, OPERATION_UNARY_FLOOR_TYPE, OPERATION_UNARY_LOG_TYPE, OPERATION_UNARY_LOG10_TYPE, OPERATION_UNARY_SIN_TYPE, OPERATION_UNARY_SINH_TYPE, OPERATION_UNARY_SQRT_TYPE, OPERATION_UNARY_TAN_TYPE, OPERATION_UNARY_TANH_TYPE, OPERATION_UNARY_TRANS_TYPE, OPERATION_UNARY_NORM_1_TYPE, OPERATION_UNARY_NORM_2_TYPE, OPERATION_UNARY_NORM_INF_TYPE, // binary expression OPERATION_BINARY_ACCESS_TYPE, OPERATION_BINARY_ASSIGN_TYPE, OPERATION_BINARY_INPLACE_ADD_TYPE, OPERATION_BINARY_INPLACE_SUB_TYPE, OPERATION_BINARY_ADD_TYPE, OPERATION_BINARY_SUB_TYPE, OPERATION_BINARY_MAT_VEC_PROD_TYPE, OPERATION_BINARY_MAT_MAT_PROD_TYPE, OPERATION_BINARY_MULT_TYPE, // scalar times vector/matrix OPERATION_BINARY_DIV_TYPE, // vector/matrix divided by scalar OPERATION_BINARY_ELEMENT_PROD_TYPE, OPERATION_BINARY_ELEMENT_DIV_TYPE, OPERATION_BINARY_INNER_PROD_TYPE }; namespace result_of { /** @brief Helper metafunction for obtaining the operation ID as well as the operation family for unary and binary operations on vectors and matrices. */ template struct op_type_info { typedef typename T::ERROR_UNKNOWN_OP_TYPE error_type; }; /** \cond */ // unary operations template <> struct op_type_info > { enum { id = OPERATION_UNARY_ABS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_ACOS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_ASIN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_ATAN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_CEIL_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_COS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_COSH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_EXP_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_FABS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_FLOOR_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_LOG_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_LOG10_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_SIN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_SINH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_SQRT_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_TAN_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_UNARY_TANH_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_UNARY_NORM_1_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_UNARY_NORM_2_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_UNARY_NORM_INF_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_UNARY_TRANS_TYPE, family = OPERATION_UNARY_TYPE_FAMILY }; }; // binary operations template <> struct op_type_info { enum { id = OPERATION_BINARY_ASSIGN_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_BINARY_INPLACE_ADD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_BINARY_INPLACE_SUB_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_BINARY_ADD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_BINARY_SUB_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_BINARY_MAT_VEC_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_BINARY_MAT_MAT_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_BINARY_MULT_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_BINARY_DIV_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; template <> struct op_type_info > { enum { id = OPERATION_BINARY_ELEMENT_DIV_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; template <> struct op_type_info { enum { id = OPERATION_BINARY_INNER_PROD_TYPE, family = OPERATION_BINARY_TYPE_FAMILY }; }; /** \endcond */ } // namespace result_of /** @brief Groups the type of a node in the statement tree. Used for faster dispatching */ enum statement_node_type_family { INVALID_TYPE_FAMILY = 0, // LHS or RHS are again an expression: COMPOSITE_OPERATION_FAMILY, // device scalars: SCALAR_TYPE_FAMILY, // vector: VECTOR_TYPE_FAMILY, // matrices: MATRIX_TYPE_FAMILY }; /** @brief Encodes the type of a node in the statement tree. */ enum statement_node_subtype { INVALID_SUBTYPE = 0, //when type is COMPOSITE_OPERATION_FAMILY HOST_SCALAR_TYPE, DEVICE_SCALAR_TYPE, DENSE_VECTOR_TYPE, IMPLICIT_VECTOR_TYPE, DENSE_ROW_MATRIX_TYPE, DENSE_COL_MATRIX_TYPE, IMPLICIT_MATRIX_TYPE, COMPRESSED_MATRIX_TYPE, COORDINATE_MATRIX_TYPE, ELL_MATRIX_TYPE, HYB_MATRIX_TYPE // other matrix types to be added here }; /** @brief Encodes the type of a node in the statement tree. */ enum statement_node_numeric_type { INVALID_NUMERIC_TYPE = 0, //when type is COMPOSITE_OPERATION_FAMILY CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, HALF_TYPE, FLOAT_TYPE, DOUBLE_TYPE }; namespace result_of { ///////////// numeric type ID deduction ///////////// /** @brief Helper metafunction for obtaining the runtime type ID for a numerical type */ template struct numeric_type_id {}; /** \cond */ template <> struct numeric_type_id { enum { value = CHAR_TYPE }; }; template <> struct numeric_type_id { enum { value = UCHAR_TYPE }; }; template <> struct numeric_type_id { enum { value = SHORT_TYPE }; }; template <> struct numeric_type_id { enum { value = USHORT_TYPE }; }; template <> struct numeric_type_id { enum { value = INT_TYPE }; }; template <> struct numeric_type_id { enum { value = UINT_TYPE }; }; template <> struct numeric_type_id { enum { value = LONG_TYPE }; }; template <> struct numeric_type_id { enum { value = ULONG_TYPE }; }; template <> struct numeric_type_id { enum { value = FLOAT_TYPE }; }; template <> struct numeric_type_id { enum { value = DOUBLE_TYPE }; }; /** \endcond */ ///////////// matrix layout ID deduction ///////////// /** @brief Helper metafunction for obtaining the memory layout (row-/column-major) for a matrix. */ template struct layout_type_id {}; /** \cond */ template <> struct layout_type_id { enum { value = DENSE_COL_MATRIX_TYPE }; }; template <> struct layout_type_id { enum { value = DENSE_ROW_MATRIX_TYPE }; }; /** \endcond */ } /** @brief A class representing the 'data' for the LHS or RHS operand of the respective node. * * If it represents a compound expression, the union holds the array index within the respective statement array. * If it represents a object with data (vector, matrix, etc.) it holds the respective pointer (scalar, vector, matrix) or value (host scalar) * * The member 'type_family' is an optimization for quickly retrieving the 'type', which denotes the currently 'active' member in the union */ struct lhs_rhs_element { statement_node_type_family type_family; statement_node_subtype subtype; statement_node_numeric_type numeric_type; union { /////// Case 1: Node is another compound expression: vcl_size_t node_index; /////// Case 2: Node is a leaf, hence carries an operand: // host scalars: char host_char; unsigned char host_uchar; short host_short; unsigned short host_ushort; int host_int; unsigned int host_uint; long host_long; unsigned long host_ulong; float host_float; double host_double; // Note: ViennaCL types have potentially expensive copy-CTORs, hence using pointers: // scalars: //viennacl::scalar *scalar_char; //viennacl::scalar *scalar_uchar; //viennacl::scalar *scalar_short; //viennacl::scalar *scalar_ushort; //viennacl::scalar *scalar_int; //viennacl::scalar *scalar_uint; //viennacl::scalar *scalar_long; //viennacl::scalar *scalar_ulong; viennacl::scalar *scalar_float; viennacl::scalar *scalar_double; // vectors: //viennacl::vector_base *vector_char; //viennacl::vector_base *vector_uchar; //viennacl::vector_base *vector_short; //viennacl::vector_base *vector_ushort; //viennacl::vector_base *vector_int; //viennacl::vector_base *vector_uint; //viennacl::vector_base *vector_long; //viennacl::vector_base *vector_ulong; viennacl::vector_base *vector_float; viennacl::vector_base *vector_double; // implicit vectors: //viennacl::implicit_vector_base *implicit_vector_char; //viennacl::implicit_vector_base *implicit_vector_uchar; //viennacl::implicit_vector_base *implicit_vector_short; //viennacl::implicit_vector_base *implicit_vector_ushort; //viennacl::implicit_vector_base *implicit_vector_int; //viennacl::implicit_vector_base *implicit_vector_uint; //viennacl::implicit_vector_base *implicit_vector_long; //viennacl::implicit_vector_base *implicit_vector_ulong; viennacl::implicit_vector_base *implicit_vector_float; viennacl::implicit_vector_base *implicit_vector_double; // row-major matrices: //viennacl::matrix_base *matrix_row_char; //viennacl::matrix_base *matrix_row_uchar; //viennacl::matrix_base *matrix_row_short; //viennacl::matrix_base *matrix_row_ushort; //viennacl::matrix_base *matrix_row_int; //viennacl::matrix_base *matrix_row_uint; //viennacl::matrix_base *matrix_row_long; //viennacl::matrix_base *matrix_row_ulong; viennacl::matrix_base *matrix_row_float; viennacl::matrix_base *matrix_row_double; // column-major matrices: //viennacl::matrix_base *matrix_col_char; //viennacl::matrix_base *matrix_col_uchar; //viennacl::matrix_base *matrix_col_short; //viennacl::matrix_base *matrix_col_ushort; //viennacl::matrix_base *matrix_col_int; //viennacl::matrix_base *matrix_col_uint; //viennacl::matrix_base *matrix_col_long; //viennacl::matrix_base *matrix_col_ulong; viennacl::matrix_base *matrix_col_float; viennacl::matrix_base *matrix_col_double; //viennacl::implicit_matrix_base *implicit_matrix_char; //viennacl::implicit_matrix_base *implicit_matrix_uchar; //viennacl::implicit_matrix_base *implicit_matrix_short; //viennacl::implicit_matrix_base *implicit_matrix_ushort; //viennacl::implicit_matrix_base *implicit_matrix_int; //viennacl::implicit_matrix_base *implicit_matrix_uint; //viennacl::implicit_matrix_base *implicit_matrix_long; //viennacl::implicit_matrix_base *implicit_matrix_ulong; viennacl::implicit_matrix_base *implicit_matrix_float; viennacl::implicit_matrix_base *implicit_matrix_double; //viennacl::compressed_matrix *compressed_matrix_char; //viennacl::compressed_matrix *compressed_matrix_uchar; //viennacl::compressed_matrix *compressed_matrix_short; //viennacl::compressed_matrix *compressed_matrix_ushort; //viennacl::compressed_matrix *compressed_matrix_int; //viennacl::compressed_matrix *compressed_matrix_uint; //viennacl::compressed_matrix *compressed_matrix_long; //viennacl::compressed_matrix *compressed_matrix_ulong; viennacl::compressed_matrix *compressed_matrix_float; viennacl::compressed_matrix *compressed_matrix_double; //viennacl::coordinate_matrix *coordinate_matrix_char; //viennacl::coordinate_matrix *coordinate_matrix_uchar; //viennacl::coordinate_matrix *coordinate_matrix_short; //viennacl::coordinate_matrix *coordinate_matrix_ushort; //viennacl::coordinate_matrix *coordinate_matrix_int; //viennacl::coordinate_matrix *coordinate_matrix_uint; //viennacl::coordinate_matrix *coordinate_matrix_long; //viennacl::coordinate_matrix *coordinate_matrix_ulong; viennacl::coordinate_matrix *coordinate_matrix_float; viennacl::coordinate_matrix *coordinate_matrix_double; //viennacl::ell_matrix *ell_matrix_char; //viennacl::ell_matrix *ell_matrix_uchar; //viennacl::ell_matrix *ell_matrix_short; //viennacl::ell_matrix *ell_matrix_ushort; //viennacl::ell_matrix *ell_matrix_int; //viennacl::ell_matrix *ell_matrix_uint; //viennacl::ell_matrix *ell_matrix_long; //viennacl::ell_matrix *ell_matrix_ulong; viennacl::ell_matrix *ell_matrix_float; viennacl::ell_matrix *ell_matrix_double; //viennacl::hyb_matrix *hyb_matrix_char; //viennacl::hyb_matrix *hyb_matrix_uchar; //viennacl::hyb_matrix *hyb_matrix_short; //viennacl::hyb_matrix *hyb_matrix_ushort; //viennacl::hyb_matrix *hyb_matrix_int; //viennacl::hyb_matrix *hyb_matrix_uint; //viennacl::hyb_matrix *hyb_matrix_long; //viennacl::hyb_matrix *hyb_matrix_ulong; viennacl::hyb_matrix *hyb_matrix_float; viennacl::hyb_matrix *hyb_matrix_double; }; }; /** @brief Struct for holding the type family as well as the type of an operation (could be addition, subtraction, norm, etc.) */ struct op_element { operation_node_type_family type_family; operation_node_type type; }; /** @brief Main datastructure for an node in the statement tree */ struct statement_node { lhs_rhs_element lhs; op_element op; lhs_rhs_element rhs; }; namespace result_of { /** @brief Helper metafunction for obtaining the number of nodes of an expression template tree. */ template struct num_nodes { enum { value = 0 }; }; /** \cond */ template struct num_nodes< vector_expression > { enum { value = 1 + num_nodes::value + num_nodes::value }; }; template struct num_nodes< const vector_expression > { enum { value = 1 + num_nodes::value + num_nodes::value }; }; template struct num_nodes< matrix_expression > { enum { value = 1 + num_nodes::value + num_nodes::value }; }; template struct num_nodes< const matrix_expression > { enum { value = 1 + num_nodes::value + num_nodes::value }; }; template struct num_nodes< scalar_expression > { enum { value = 1 + num_nodes::value + num_nodes::value }; }; template struct num_nodes< const scalar_expression > { enum { value = 1 + num_nodes::value + num_nodes::value }; }; /** \endcond */ } /** \brief The main class for representing a statement such as x = inner_prod(y,z); at runtime. * * This is the equivalent to an expression template tree, but entirely built at runtime in order to perform really cool stuff such as kernel fusion. */ class statement { public: typedef statement_node value_type; typedef viennacl::vcl_size_t size_type; typedef std::vector container_type; statement(container_type const & custom_array) : array_(custom_array) {} /** @brief Generate the runtime statement from an expression template. * * Constructing a runtime statement from expression templates makes perfect sense, because this way only a single allocation is needed when creating the statement. */ template statement(LHS & lhs, OP const &, RHS const & rhs) : array_(1 + result_of::num_nodes::value) { // set OP: array_[0].op.type_family = operation_node_type_family(result_of::op_type_info::family); array_[0].op.type = operation_node_type(result_of::op_type_info::id); // set LHS: add_lhs(0, 1, lhs); // set RHS: add_rhs(0, 1, rhs); } container_type const & array() const { return array_; } size_type root() const { return 0; } private: ///////////// Scalar node helper //////////////// // TODO: add integer vector overloads here void assign_element(lhs_rhs_element & elem, viennacl::scalar const & t) { elem.scalar_float = const_cast *>(&t); } void assign_element(lhs_rhs_element & elem, viennacl::scalar const & t) { elem.scalar_double = const_cast *>(&t); } ///////////// Vector node helper //////////////// // TODO: add integer vector overloads here void assign_element(lhs_rhs_element & elem, viennacl::vector_base const & t) { elem.vector_float = const_cast *>(&t); } void assign_element(lhs_rhs_element & elem, viennacl::vector_base const & t) { elem.vector_double = const_cast *>(&t); } ///////////// Matrix node helper //////////////// // TODO: add integer matrix overloads here void assign_element(lhs_rhs_element & elem, viennacl::matrix_base const & t) { elem.matrix_col_float = const_cast *>(&t); } void assign_element(lhs_rhs_element & elem, viennacl::matrix_base const & t) { elem.matrix_row_float = const_cast *>(&t); } void assign_element(lhs_rhs_element & elem, viennacl::matrix_base const & t) { elem.matrix_col_double = const_cast *>(&t); } void assign_element(lhs_rhs_element & elem, viennacl::matrix_base const & t) { elem.matrix_row_double = const_cast *>(&t); } void assign_element(lhs_rhs_element & elem, viennacl::compressed_matrix const & m) { elem.compressed_matrix_float = const_cast *>(&m); } void assign_element(lhs_rhs_element & elem, viennacl::compressed_matrix const & m) { elem.compressed_matrix_double = const_cast *>(&m); } void assign_element(lhs_rhs_element & elem, viennacl::coordinate_matrix const & m) { elem.coordinate_matrix_float = const_cast *>(&m); } void assign_element(lhs_rhs_element & elem, viennacl::coordinate_matrix const & m) { elem.coordinate_matrix_double = const_cast *>(&m); } void assign_element(lhs_rhs_element & elem, viennacl::ell_matrix const & m) { elem.ell_matrix_float = const_cast *>(&m); } void assign_element(lhs_rhs_element & elem, viennacl::ell_matrix const & m) { elem.ell_matrix_double = const_cast *>(&m); } void assign_element(lhs_rhs_element & elem, viennacl::hyb_matrix const & m) { elem.hyb_matrix_float = const_cast *>(&m); } void assign_element(lhs_rhs_element & elem, viennacl::hyb_matrix const & m) { elem.hyb_matrix_double = const_cast *>(&m); } //////////// Tree leaves (terminals) //////////////////// vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, float const & t) { elem.type_family = SCALAR_TYPE_FAMILY; elem.subtype = HOST_SCALAR_TYPE; elem.numeric_type = FLOAT_TYPE; elem.host_float = t; return next_free; } vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, double const & t) { elem.type_family = SCALAR_TYPE_FAMILY; elem.subtype = HOST_SCALAR_TYPE; elem.numeric_type = DOUBLE_TYPE; elem.host_double = t; return next_free; } template vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, viennacl::scalar const & t) { elem.type_family = SCALAR_TYPE_FAMILY; elem.subtype = DEVICE_SCALAR_TYPE; elem.numeric_type = statement_node_numeric_type(result_of::numeric_type_id::value); assign_element(elem, t); return next_free; } template vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, viennacl::vector_base const & t) { elem.type_family = VECTOR_TYPE_FAMILY; elem.subtype = DENSE_VECTOR_TYPE; elem.numeric_type = statement_node_numeric_type(result_of::numeric_type_id::value); assign_element(elem, t); return next_free; } template vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, viennacl::matrix_base const & t) { elem.type_family = MATRIX_TYPE_FAMILY; elem.subtype = statement_node_subtype(result_of::layout_type_id::value); elem.numeric_type = statement_node_numeric_type(result_of::numeric_type_id::value); assign_element(elem, t); return next_free; } template vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, viennacl::compressed_matrix const & t) { elem.type_family = MATRIX_TYPE_FAMILY; elem.subtype = COMPRESSED_MATRIX_TYPE; elem.numeric_type = statement_node_numeric_type(result_of::numeric_type_id::value); assign_element(elem, t); return next_free; } template vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, viennacl::coordinate_matrix const & t) { elem.type_family = MATRIX_TYPE_FAMILY; elem.subtype = COORDINATE_MATRIX_TYPE; elem.numeric_type = statement_node_numeric_type(result_of::numeric_type_id::value); assign_element(elem, t); return next_free; } template vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, viennacl::ell_matrix const & t) { elem.type_family = MATRIX_TYPE_FAMILY; elem.subtype = ELL_MATRIX_TYPE; elem.numeric_type = statement_node_numeric_type(result_of::numeric_type_id::value); assign_element(elem, t); return next_free; } template vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, viennacl::hyb_matrix const & t) { elem.type_family = MATRIX_TYPE_FAMILY; elem.subtype = HYB_MATRIX_TYPE; elem.numeric_type = statement_node_numeric_type(result_of::numeric_type_id::value); assign_element(elem, t); return next_free; } //////////// Tree nodes (non-terminals) //////////////////// template vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, viennacl::scalar_expression const & t) { elem.type_family = COMPOSITE_OPERATION_FAMILY; elem.subtype = INVALID_SUBTYPE; elem.numeric_type = INVALID_NUMERIC_TYPE; elem.node_index = next_free; return add_node(next_free, next_free + 1, t); } template vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, viennacl::vector_expression const & t) { elem.type_family = COMPOSITE_OPERATION_FAMILY; elem.subtype = INVALID_SUBTYPE; elem.numeric_type = INVALID_NUMERIC_TYPE; elem.node_index = next_free; return add_node(next_free, next_free + 1, t); } template vcl_size_t add_element(vcl_size_t next_free, lhs_rhs_element & elem, viennacl::matrix_expression const & t) { elem.type_family = COMPOSITE_OPERATION_FAMILY; elem.subtype = INVALID_SUBTYPE; elem.numeric_type = INVALID_NUMERIC_TYPE; elem.node_index = next_free; return add_node(next_free, next_free + 1, t); } //////////// Helper routines //////////////////// template vcl_size_t add_lhs(vcl_size_t current_index, vcl_size_t next_free, T const & t) { return add_element(next_free, array_[current_index].lhs, t); } template vcl_size_t add_rhs(vcl_size_t current_index, vcl_size_t next_free, T const & t) { return add_element(next_free, array_[current_index].rhs, t); } //////////// Internal interfaces //////////////////// template