pax_global_header00006660000000000000000000000064131624625430014520gustar00rootroot0000000000000052 comment=89ede3ba90c906a8ec6b9a0f4bef188ba5bb2fd8 .gitignore000066400000000000000000000000071316246254300130510ustar00rootroot00000000000000build/ .travis.yml000066400000000000000000000034051316246254300131770ustar00rootroot00000000000000language: c compiler: - gcc - clang cache: directories: - $HOME/OpenBlasInstall sudo: false env: - TORCH_LUA_VERSION=LUAJIT21 - TORCH_LUA_VERSION=LUA51 - TORCH_LUA_VERSION=LUA52 os: - linux addons: apt: packages: - cmake - gfortran - gcc-multilib - gfortran-multilib - liblapack-dev - build-essential - gcc - g++ - curl - cmake - libreadline-dev - git-core - libqt4-core - libqt4-gui - libqt4-dev - libjpeg-dev - libpng-dev - ncurses-dev - imagemagick - libzmq3-dev - gfortran - unzip - gnuplot - gnuplot-x11 before_script: - export ROOT_TRAVIS_DIR=$(pwd) - export INSTALL_PREFIX=~/torch/install - ls $HOME/OpenBlasInstall/lib || (cd /tmp/ && git clone https://github.com/xianyi/OpenBLAS.git -b master && cd OpenBLAS && (make NO_AFFINITY=1 -j$(getconf _NPROCESSORS_ONLN) 2>/dev/null >/dev/null) && make PREFIX=$HOME/OpenBlasInstall install) - git clone https://github.com/torch/distro.git ~/torch --recursive - cd ~/torch && git submodule update --init --recursive - mkdir build && cd build - export CMAKE_LIBRARY_PATH=$HOME/OpenBlasInstall/include:$HOME/OpenBlasInstall/lib:$CMAKE_LIBRARY_PATH - cmake .. -DCMAKE_INSTALL_PREFIX="${INSTALL_PREFIX}" -DCMAKE_BUILD_TYPE=Release -DWITH_${TORCH_LUA_VERSION}=ON - make && make install - cd $ROOT_TRAVIS_DIR - export LD_LIBRARY_PATH=${INSTALL_PREFIX}/lib:$LD_LIBRARY_PATH script: - ${INSTALL_PREFIX}/bin/luarocks make rocks/torch-scm-1.rockspec - ${INSTALL_PREFIX}/bin/luarocks install luaffi - export PATH=${INSTALL_PREFIX}/bin:$PATH - export TESTLUA=$(which luajit lua | head -n 1) - ${TESTLUA} -ltorch -e "t=torch.test(); if t.errors[1] then os.exit(1) end" - cd test - ${TESTLUA} test_writeObject.lua - ${TESTLUA} test_Tester.lua CMakeLists.txt000066400000000000000000000060661316246254300136340ustar00rootroot00000000000000IF(APPLE) CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR) CMAKE_POLICY(VERSION 2.8.12) ELSE() CMAKE_MINIMUM_REQUIRED(VERSION 2.8 FATAL_ERROR) CMAKE_POLICY(VERSION 2.8) ENDIF() SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}") IF (NOT MSVC) IF (MINGW) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=format") ELSE() SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=implicit-function-declaration -Werror=format") ENDIF(MINGW) ENDIF(NOT MSVC) # Flags # When using MSVC IF(MSVC) # we want to respect the standard, and we are bored of those **** . ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) ENDIF(MSVC) # OpenMP support? SET(WITH_OPENMP ON CACHE BOOL "OpenMP support if available?") IF (APPLE AND CMAKE_COMPILER_IS_GNUCC) EXEC_PROGRAM (uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION) STRING (REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION}) MESSAGE (STATUS "MAC OS Darwin Version: ${DARWIN_VERSION}") IF (DARWIN_VERSION GREATER 9) SET(APPLE_OPENMP_SUCKS 1) ENDIF (DARWIN_VERSION GREATER 9) EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2) MESSAGE(STATUS "Warning: Disabling OpenMP (unstable with this version of GCC)") MESSAGE(STATUS " Install GCC >= 4.6.2 or change your OS to enable OpenMP") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unknown-pragmas") SET(WITH_OPENMP OFF CACHE BOOL "OpenMP support if available?" FORCE) ENDIF () ENDIF () IF (WITH_OPENMP) FIND_PACKAGE(OpenMP) IF(OPENMP_FOUND) MESSAGE(STATUS "Compiling with OpenMP support") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") ENDIF(OPENMP_FOUND) ENDIF (WITH_OPENMP) # Includes INCLUDE(TorchPaths) INCLUDE(TorchPathsInit) INCLUDE(TorchPackage) INCLUDE(TorchWrap) INCLUDE(TorchExports) # Torch libraries ADD_SUBDIRECTORY(lib) CONFIGURE_FILE(paths.lua.in "${CMAKE_CURRENT_BINARY_DIR}/paths.lua") INCLUDE_DIRECTORIES(BEFORE "${LUA_INCDIR}") INCLUDE_DIRECTORIES(BEFORE "${CMAKE_CURRENT_SOURCE_DIR}/lib/TH") INCLUDE_DIRECTORIES(BEFORE "${CMAKE_CURRENT_BINARY_DIR}/lib/TH") INCLUDE_DIRECTORIES(BEFORE "${CMAKE_CURRENT_SOURCE_DIR}/lib/luaT") LINK_DIRECTORIES("${LUA_LIBDIR}") SET(src DiskFile.c File.c MemoryFile.c PipeFile.c Storage.c Tensor.c Timer.c utils.c init.c TensorOperator.c TensorMath.c random.c Generator.c) SET(luasrc init.lua File.lua Tensor.lua CmdLine.lua FFInterface.lua Tester.lua TestSuite.lua ${CMAKE_CURRENT_BINARY_DIR}/paths.lua test/test.lua) # Necessary do generate wrapper ADD_TORCH_WRAP(tensormathwrap TensorMath.lua) ADD_TORCH_WRAP(randomwrap random.lua) ADD_TORCH_PACKAGE(torch "${src}" "${luasrc}") TARGET_LINK_LIBRARIES(torch luaT TH) IF(LUALIB) TARGET_LINK_LIBRARIES(torch ${LUALIB}) ENDIF() INSTALL(FILES "README.md" DESTINATION "${Torch_INSTALL_LUA_PATH_SUBDIR}/torch") INSTALL(DIRECTORY "doc" DESTINATION "${Torch_INSTALL_LUA_PATH_SUBDIR}/torch") CONTRIBUTING.md000066400000000000000000000116151316246254300133210ustar00rootroot00000000000000# Contributing to Torch7 Core (torch7, nn, cutorch, cunn) Thanks a lot! There are plenty of ways you can help! Please take a moment to review this document in order to make the contribution process easy and effective for everyone involved. Following these guidelines helps to communicate that you respect the time of the developers managing and developing this open source project. In return, they should reciprocate that respect in addressing your issue or assessing patches and features. ## Using the issue tracker The [issue tracker](https://github.com/torch/torch7/issues) is the preferred channel for [bug reports](#bugs), [features requests](#features) and [submitting pull requests](#pull-requests), but please respect the following restrictions: * Please **do not** use the issue tracker for personal support requests (use [mailing-list](https://groups.google.com/forum/#!forum/torch7)). * Please **do not** open issues regarding the code in a torch package outside the core. For example don't open issues about the REPL in the torch7 issue tracker, use the trepl issue tracker for that. ## Bug reports A bug is a _demonstrable problem_ that is caused by the code in the repository. Good bug reports are extremely helpful - thank you! Guidelines for bug reports: 1. **Use the GitHub issue search** — check if the issue has already been reported. 2. **Check if the issue has been fixed** — try to reproduce it using the latest `master` or development branch in the repository. 3. **Isolate the problem** — ideally create test case that is within reason, preferably within 100 lines of code. A good bug report shouldn't leave others needing to chase you up for more information. Please try to be as detailed as possible in your report. What is your environment? What steps will reproduce the issue? What OS do you experience the problem? What would you expect to be the outcome? All these details will help people to fix any potential bugs. ## Feature requests Feature requests are welcome to be filed. Torch is community-developed, the maintainers are not exclusive torch developers, so keep that in mind. The purpose of feature requests is for others who are looking to implement a feature are aware of the interest in the feature. ## Pull requests Good pull requests - patches, improvements, new features - are a fantastic help. They should remain focused in scope **and avoid containing unrelated commits.** **Please ask first** before embarking on any significant pull request (e.g. implementing features, refactoring code, porting to a different language), otherwise you risk spending a lot of time working on something that the project's developers might not want to merge into the project. Please adhere to the coding conventions used throughout a project (indentation, accurate comments, etc.) and any other requirements (such as test coverage). Adhering to the following this process is the best way to get your work included in the project: 1. [Fork](https://help.github.com/articles/fork-a-repo) the project, clone your fork, and configure the remotes: ```bash # Clone your fork of the repo into the current directory git clone https://github.com//torch7.git # Navigate to the newly cloned directory cd torch7 # Assign the original repo to a remote called "upstream" git remote add upstream https://github.com/torch/torch7.git ``` 2. If you cloned a while ago, get the latest changes from upstream: ```bash git checkout master git pull upstream master ``` 3. Create a new topic branch (off the main project development branch) to contain your feature, change, or fix: ```bash git checkout -b ``` 4. Commit your changes in logical chunks. Please try to adhere to these [git commit message guidelines](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) . Use Git's [interactive rebase](https://help.github.com/articles/about-git-rebase) feature to tidy up your commits before making them public. This helps us keep the commit history in logical blocks and clean, as torch grows. For example: - If you are adding a new function or a module, keep the module + tests + doc to a single commit unless logically warranted. - If you are fixing a bug, keep the bugfix to a single commit unless logically warranted. 5. Locally merge (or rebase) the upstream development branch into your topic branch: ```bash git pull [--rebase] upstream master ``` 6. Push your topic branch up to your fork: ```bash git push origin ``` 7. [Open a Pull Request](https://help.github.com/articles/using-pull-requests/) with a clear title and description. **IMPORTANT**: By submitting a patch, you agree to allow the project owners to license your work under the terms of the BSD License. COPYRIGHT.txt000066400000000000000000000040011316246254300131700ustar00rootroot00000000000000Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) Copyright (c) 2011-2013 NYU (Clement Farabet) Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute (Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the names of Deepmind Technologies, NYU, NEC Laboratories America and IDIAP Research Institute nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. CmdLine.lua000066400000000000000000000160571316246254300131130ustar00rootroot00000000000000local CmdLine = torch.class('torch.CmdLine') local function strip(str) return string.match(str, '%-*(.*)') end local function pad(str, sz) return str .. string.rep(' ', sz-#str) end function CmdLine:error(msg) print('') io.stderr:write(msg) print('') self:help() os.exit(1) end function CmdLine:__readArgument__(params, arg, i, nArgument) local argument = self.arguments[nArgument] local value = arg[i] if nArgument > #self.arguments then self:error('invalid argument: ' .. value) end if argument.type and type(value) ~= argument.type then self:error('invalid argument type for argument ' .. argument.key .. ' (should be ' .. argument.type .. ')') end params[strip(argument.key)] = value return 1 end function CmdLine:__readOption__(params, arg, i) local key = arg[i] local option = self.options[key] if not option then self:error('unknown option ' .. key) end if option.type and option.type == 'boolean' then params[strip(key)] = not option.default return 1 else local value = arg[i+1] if not value then self:error('missing argument for option ' .. key) end if not option.type or option.type == 'string' then elseif option.type == 'number' then value = tonumber(value) else self:error('unknown required option type ' .. option.type) end if not value then self:error('invalid type for option ' .. key .. ' (should be ' .. option.type .. ')') end params[strip(key)] = value return 2 end end function CmdLine:__init(argseparator_,keyseparator_) self.argseparator = argseparator_ or ',' self.keyseparator = keyseparator_ or '=' self.options = {} self.arguments = {} self.helplines = {} self.dateformat = nil self.silentio = false end function CmdLine:silent() self.silentio = true end function CmdLine:addTime(name, format) format = format or '%Y-%m-%d %H:%M:%S' if type(format) ~= 'string' then error('Argument has to be string') end if name ~= nil then name = '[' .. name .. ']: ' else name = '' end self.dateformat = format .. name end function CmdLine:argument(key, help, _type_) table.insert(self.arguments, {key=key, help=help, type=_type_}) table.insert(self.helplines, self.arguments[#self.arguments]) end function CmdLine:option(key, default, help, _type_) if default == nil then error('option ' .. key .. ' has no default value') end _type_ = _type_ or type(default) if type(default) ~= _type_ then error('option ' .. key .. ' has wrong default type value') end self.options[key] = {key=key, default=default, help=help, type=_type_} table.insert(self.helplines, self.options[key]) end function CmdLine:default() local params = {} for option,v in pairs(self.options) do params[strip(option)] = v.default end return params end function CmdLine:parse(arg) local i = 1 local params = self:default() local nArgument = 0 while i <= #arg do if arg[i] == '-help' or arg[i] == '-h' or arg[i] == '--help' then self:help(arg) os.exit(0) end if self.options[arg[i]] then i = i + self:__readOption__(params, arg, i) else nArgument = nArgument + 1 i = i + self:__readArgument__(params, arg, i, nArgument) end end if nArgument ~= #self.arguments then self:error('not enough arguments') end return params end function CmdLine:string(prefix, params, ignore) local arguments = {} local options = {} prefix = prefix or '' for k,v in pairs(params) do if ignore[k] then print('-- ignore option ' .. k) elseif self.options['-' .. k] then if v ~= self.options['-' .. k].default or ignore[k] == false then if type(v) == 'boolean' then if v then v = 't' else v = 'f' end end table.insert(options, k .. self.keyseparator .. v) print(k,v,self.options['-' .. k].default) end else local narg for i=1,#self.arguments do if strip(self.arguments[i].key) == k then narg = i end end if narg then arguments[narg] = k .. self.keyseparator .. v else print('WARNING: unknown option/argument: ' .. k .. ' IGNORING for DIRECTORY NAME') end end end table.sort(options) local str = table.concat(arguments, self.argseparator) if str == '' then str = table.concat(options, self.argseparator) else str = str .. self.argseparator .. table.concat(options, self.argseparator) end if str == '' then return prefix else return prefix .. self.argseparator .. str end end local oprint = nil function CmdLine:log(file, params) local f = (io.type(file) == 'file' and file) or io.open(file, 'w') oprint = oprint or print -- get the current print function lazily function print(...) local n = select("#", ...) local arg = {...} if not self.silentio then oprint(...) end local str = {} if self.dateformat then table.insert(str, os.date(self.dateformat)) end for i=1,n do table.insert(str,tostring(arg[i])) end table.insert(str,'\n') f:write(table.concat(str,' ')) f:flush() end print('[program started on ' .. os.date() .. ']') print('[command line arguments]') if params then for k,v in pairs(params) do print(k,v) end end print('[----------------------]') end function CmdLine:text(txt) txt = txt or '' assert(type(txt) == 'string') table.insert(self.helplines, txt) end function CmdLine:help(arg) io.write('Usage: ') if arg then io.write(arg[0] .. ' ') end io.write('[options]') for i=1,#self.arguments do io.write(' <' .. strip(self.arguments[i].key) .. '>') end io.write('\n') -- first pass to compute max length local optsz = 0 for _,option in ipairs(self.helplines) do if type(option) == 'table' then if option.default ~= nil then -- it is an option if #option.key > optsz then optsz = #option.key end else -- it is an argument if #strip(option.key)+2 > optsz then optsz = #strip(option.key)+2 end end end end -- second pass to print for _,option in ipairs(self.helplines) do if type(option) == 'table' then io.write(' ') if option.default ~= nil then -- it is an option io.write(pad(option.key, optsz)) if option.help then io.write(' ' .. option.help) end io.write(' [' .. tostring(option.default) .. ']') else -- it is an argument io.write(pad('<' .. strip(option.key) .. '>', optsz)) if option.help then io.write(' ' .. option.help) end end else io.write(option) -- just some additional help end io.write('\n') end end DiskFile.c000066400000000000000000000054001316246254300127210ustar00rootroot00000000000000#include "general.h" static int torch_DiskFile_new(lua_State *L) { const char *name = luaL_checkstring(L, 1); const char *mode = luaL_optstring(L, 2, "r"); int isQuiet = luaT_optboolean(L, 3, 0); THFile *self = THDiskFile_new(name, mode, isQuiet); luaT_pushudata(L, self, "torch.DiskFile"); return 1; } static int torch_DiskFile_free(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.DiskFile"); THFile_free(self); return 0; } static int torch_DiskFile_isLittleEndianCPU(lua_State *L) { lua_pushboolean(L, THDiskFile_isLittleEndianCPU()); return 1; } static int torch_DiskFile_isBigEndianCPU(lua_State *L) { lua_pushboolean(L, !THDiskFile_isLittleEndianCPU()); return 1; } static int torch_DiskFile_nativeEndianEncoding(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.DiskFile"); THDiskFile_nativeEndianEncoding(self); lua_settop(L, 1); return 1; } static int torch_DiskFile_littleEndianEncoding(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.DiskFile"); THDiskFile_littleEndianEncoding(self); lua_settop(L, 1); return 1; } static int torch_DiskFile_bigEndianEncoding(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.DiskFile"); THDiskFile_bigEndianEncoding(self); lua_settop(L, 1); return 1; } static int torch_DiskFile_longSize(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.DiskFile"); THDiskFile_longSize(self, lua_tointeger(L, 2)); lua_settop(L, 1); return 1; } static int torch_DiskFile_noBuffer(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.DiskFile"); THDiskFile_noBuffer(self); lua_settop(L, 1); return 1; } static int torch_DiskFile___tostring__(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.DiskFile"); lua_pushfstring(L, "torch.DiskFile on <%s> [status: %s -- mode %c%c]", THDiskFile_name(self), (THFile_isOpened(self) ? "open" : "closed"), (THFile_isReadable(self) ? 'r' : ' '), (THFile_isWritable(self) ? 'w' : ' ')); return 1; } static const struct luaL_Reg torch_DiskFile__ [] = { {"isLittleEndianCPU", torch_DiskFile_isLittleEndianCPU}, {"isBigEndianCPU", torch_DiskFile_isBigEndianCPU}, {"nativeEndianEncoding", torch_DiskFile_nativeEndianEncoding}, {"littleEndianEncoding", torch_DiskFile_littleEndianEncoding}, {"bigEndianEncoding", torch_DiskFile_bigEndianEncoding}, {"longSize", torch_DiskFile_longSize}, {"noBuffer", torch_DiskFile_noBuffer}, {"__tostring__", torch_DiskFile___tostring__}, {NULL, NULL} }; void torch_DiskFile_init(lua_State *L) { luaT_newmetatable(L, "torch.DiskFile", "torch.File", torch_DiskFile_new, torch_DiskFile_free, NULL); luaT_setfuncs(L, torch_DiskFile__, 0); lua_pop(L, 1); } FFInterface.lua000066400000000000000000000150631316246254300137100ustar00rootroot00000000000000-- if this causes issues, you may need to: -- luarocks remove --force ffi -- and follow instructions to install -- https://github.com/facebook/luaffifb local ok, ffi = pcall(require, 'ffi') local function checkArgument(condition, fn, ud, msg, level) local level = level or 3 if not condition then error("bad argument #" .. ud .. " to '" .. fn .. "' (" .. msg .. ")", level) end end local function checkArgumentType(expected, actual, fn, ud, level) local level = level or 3 if expected ~= actual then checkArgument(false, fn, ud, expected .. " expected, got " .. actual, level + 1) end end if ok then local Real2real = { Byte='unsigned char', Char='char', Short='short', Int='int', Long='long', Float='float', Double='double', Half='THHalf' } -- Allocator ffi.cdef[[ typedef struct THAllocator { void* (*malloc)(void*, ptrdiff_t); void* (*realloc)(void*, void*, ptrdiff_t); void (*free)(void*, void*); } THAllocator; ]] -- Half ffi.cdef[[ typedef struct { unsigned short x; } __THHalf; typedef __THHalf THHalf; ]] -- Storage for Real, real in pairs(Real2real) do local cdefs = [[ typedef struct THRealStorage { real *data; ptrdiff_t size; int refcount; char flag; THAllocator *allocator; void *allocatorContext; } THRealStorage; ]] cdefs = cdefs:gsub('Real', Real):gsub('real', real) ffi.cdef(cdefs) local Storage = torch.getmetatable(string.format('torch.%sStorage', Real)) local Storage_tt = ffi.typeof('TH' .. Real .. 'Storage**') rawset(Storage, "cdata", function(self) return Storage_tt(self)[0] end) rawset(Storage, "data", function(self) return Storage_tt(self)[0].data end) end -- Tensor for Real, real in pairs(Real2real) do local cdefs = [[ typedef struct THRealTensor { long *size; long *stride; int nDimension; THRealStorage *storage; ptrdiff_t storageOffset; int refcount; char flag; } THRealTensor; ]] cdefs = cdefs:gsub('Real', Real):gsub('real', real) ffi.cdef(cdefs) local Tensor_type = string.format('torch.%sTensor', Real) local Tensor = torch.getmetatable(Tensor_type) local Tensor_tt = ffi.typeof('TH' .. Real .. 'Tensor**') rawset(Tensor, "cdata", function(self) if not self then return nil; end return Tensor_tt(self)[0] end) rawset(Tensor, "data", function(self) if not self then return nil; end self = Tensor_tt(self)[0] return self.storage ~= nil and self.storage.data + self.storageOffset or nil end) -- faster apply (contiguous case) if Tensor_type ~= 'torch.HalfTensor' then local apply = Tensor.apply rawset(Tensor, "apply", function(self, func) if self:isContiguous() and self.data then local self_d = self:data() for i=0,self:nElement()-1 do local res = func(tonumber(self_d[i])) -- tonumber() required for long... if res then self_d[i] = res end end return self else return apply(self, func) end end) -- faster map (contiguous case) local map = Tensor.map rawset(Tensor, "map", function(self, src, func) checkArgument(torch.isTensor(src), "map", 1, "tensor expected") checkArgumentType(self:type(), src:type(), "map", 1) if self:isContiguous() and src:isContiguous() and self.data and src.data then local self_d = self:data() local src_d = src:data() assert(src:nElement() == self:nElement(), 'size mismatch') for i=0,self:nElement()-1 do local res = func(tonumber(self_d[i]), tonumber(src_d[i])) -- tonumber() required for long... if res then self_d[i] = res end end return self else return map(self, src, func) end end) -- faster map2 (contiguous case) local map2 = Tensor.map2 rawset(Tensor, "map2", function(self, src1, src2, func) checkArgument(torch.isTensor(src1), "map", 1, "tensor expected") checkArgument(torch.isTensor(src2), "map", 2, "tensor expected") checkArgumentType(self:type(), src1:type(), "map", 1) checkArgumentType(self:type(), src2:type(), "map", 2) if self:isContiguous() and src1:isContiguous() and src2:isContiguous() and self.data and src1.data and src2.data then local self_d = self:data() local src1_d = src1:data() local src2_d = src2:data() assert(src1:nElement() == self:nElement(), 'size mismatch') assert(src2:nElement() == self:nElement(), 'size mismatch') for i=0,self:nElement()-1 do local res = func(tonumber(self_d[i]), tonumber(src1_d[i]), tonumber(src2_d[i])) -- tonumber() required for long... if res then self_d[i] = res end end return self else return map2(self, src1, src2, func) end end) end end -- torch.data -- will fail if :data() is not defined function torch.data(self, asnumber) if not self then return nil; end local data = self:data() if asnumber then return ffi.cast('intptr_t', data) else return data end end -- torch.cdata -- will fail if :cdata() is not defined function torch.cdata(self, asnumber) if not self then return nil; end local cdata = self:cdata() if asnumber then return ffi.cast('intptr_t', cdata) else return cdata end end end File.c000066400000000000000000000212371316246254300121140ustar00rootroot00000000000000#include "general.h" #include "THFile.h" #include "luaT.h" #define IMPLEMENT_TORCH_FILE_FLAG(NAME) \ static int torch_File_##NAME(lua_State *L) \ { \ THFile *self = luaT_checkudata(L, 1, "torch.File"); \ lua_pushboolean(L, THFile_##NAME(self)); \ return 1; \ } IMPLEMENT_TORCH_FILE_FLAG(isQuiet) IMPLEMENT_TORCH_FILE_FLAG(isReadable) IMPLEMENT_TORCH_FILE_FLAG(isWritable) IMPLEMENT_TORCH_FILE_FLAG(isBinary) IMPLEMENT_TORCH_FILE_FLAG(isAutoSpacing) IMPLEMENT_TORCH_FILE_FLAG(hasError) #define IMPLEMENT_TORCH_FILE_FUNC(NAME) \ static int torch_File_##NAME(lua_State *L) \ { \ THFile *self = luaT_checkudata(L, 1, "torch.File"); \ THFile_##NAME(self); \ lua_settop(L, 1); \ return 1; \ } IMPLEMENT_TORCH_FILE_FUNC(binary) IMPLEMENT_TORCH_FILE_FUNC(ascii) IMPLEMENT_TORCH_FILE_FUNC(autoSpacing) IMPLEMENT_TORCH_FILE_FUNC(noAutoSpacing) IMPLEMENT_TORCH_FILE_FUNC(quiet) IMPLEMENT_TORCH_FILE_FUNC(pedantic) IMPLEMENT_TORCH_FILE_FUNC(clearError) IMPLEMENT_TORCH_FILE_FUNC(synchronize) static int torch_File_seek(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.File"); ptrdiff_t position = luaL_checkinteger(L, 2)-1; // >= 0 because it has 1 already subtracted THArgCheck(position >= 0, 2, "position has to be greater than 0!"); THFile_seek(self, (size_t)position); lua_settop(L, 1); return 1; } IMPLEMENT_TORCH_FILE_FUNC(seekEnd) static int torch_File_position(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.File"); lua_pushnumber(L, THFile_position(self)+1); return 1; } IMPLEMENT_TORCH_FILE_FUNC(close) #define IMPLEMENT_TORCH_FILE_RW(TYPEC, TYPE) \ static int torch_File_read##TYPEC(lua_State *L) \ { \ THFile *self = luaT_checkudata(L, 1, "torch.File"); \ int narg = lua_gettop(L); \ \ if(narg == 1) \ { \ lua_pushnumber(L, THFile_read##TYPEC##Scalar(self)); \ return 1; \ } \ else if(narg == 2) \ { \ if(lua_isnumber(L, 2)) \ { \ ptrdiff_t size = lua_tonumber(L, 2); \ ptrdiff_t nread; \ \ TH##TYPEC##Storage *storage = TH##TYPEC##Storage_newWithSize(size); \ luaT_pushudata(L, storage, "torch." #TYPEC "Storage"); \ nread = THFile_read##TYPEC(self, storage); \ if(nread != size) \ TH##TYPEC##Storage_resize(storage, nread); \ return 1; \ } \ else if(luaT_toudata(L, 2, "torch." #TYPEC "Storage")) \ { \ TH##TYPEC##Storage *storage = luaT_toudata(L, 2, "torch." #TYPEC "Storage"); \ lua_pushnumber(L, THFile_read##TYPEC(self, storage)); \ return 1; \ } \ } \ \ luaL_error(L, "nothing, number, or " #TYPEC "Storage expected"); \ return 0; \ } \ \ static int torch_File_write##TYPEC(lua_State *L) \ { \ THFile *self = luaT_checkudata(L, 1, "torch.File"); \ int narg = lua_gettop(L); \ \ if(narg == 2) \ { \ if(lua_isnumber(L, 2)) \ { \ TYPE value = lua_tonumber(L, 2); \ THFile_write##TYPEC##Scalar(self, (TYPE)value); \ return 0; \ } \ else if(luaT_toudata(L, 2, "torch." #TYPEC "Storage")) \ { \ TH##TYPEC##Storage *storage = luaT_toudata(L, 2, "torch." #TYPEC "Storage"); \ lua_pushnumber(L, THFile_write##TYPEC(self, storage)); \ return 1; \ } \ } \ \ luaL_error(L, "number, or " #TYPEC "Storage expected"); \ return 0; \ } IMPLEMENT_TORCH_FILE_RW(Byte, unsigned char) IMPLEMENT_TORCH_FILE_RW(Char, char) IMPLEMENT_TORCH_FILE_RW(Short, short) IMPLEMENT_TORCH_FILE_RW(Int, int) IMPLEMENT_TORCH_FILE_RW(Long, long) IMPLEMENT_TORCH_FILE_RW(Float, float) IMPLEMENT_TORCH_FILE_RW(Double, double) static int torch_File_readString(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.File"); const char *format = luaL_checkstring(L, 2); char *str; ptrdiff_t size; size = THFile_readStringRaw(self, format, &str); lua_pushlstring(L, str, size); THFree(str); return 1; } static int torch_File_writeString(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.File"); const char *str = NULL; size_t size; luaL_checktype(L, 2, LUA_TSTRING); str = lua_tolstring(L, 2, &size); lua_pushnumber(L, THFile_writeStringRaw(self, str, size)); return 1; } static const struct luaL_Reg torch_File__ [] = { {"isQuiet", torch_File_isQuiet}, {"isReadable", torch_File_isReadable}, {"isWritable", torch_File_isWritable}, {"isBinary", torch_File_isBinary}, {"isAutoSpacing", torch_File_isAutoSpacing}, {"hasError", torch_File_hasError}, {"binary", torch_File_binary}, {"ascii", torch_File_ascii}, {"autoSpacing", torch_File_autoSpacing}, {"noAutoSpacing", torch_File_noAutoSpacing}, {"quiet", torch_File_quiet}, {"pedantic", torch_File_pedantic}, {"clearError", torch_File_clearError}, /* DEBUG: CHECK DISK FREE & READ/WRITE STRING*/ {"readByte", torch_File_readByte}, {"readChar", torch_File_readChar}, {"readShort", torch_File_readShort}, {"readInt", torch_File_readInt}, {"readLong", torch_File_readLong}, {"readFloat", torch_File_readFloat}, {"readDouble", torch_File_readDouble}, {"readString", torch_File_readString}, {"writeByte", torch_File_writeByte}, {"writeChar", torch_File_writeChar}, {"writeShort", torch_File_writeShort}, {"writeInt", torch_File_writeInt}, {"writeLong", torch_File_writeLong}, {"writeFloat", torch_File_writeFloat}, {"writeDouble", torch_File_writeDouble}, {"writeString", torch_File_writeString}, {"synchronize", torch_File_synchronize}, {"seek", torch_File_seek}, {"seekEnd", torch_File_seekEnd}, {"position", torch_File_position}, {"close", torch_File_close}, {NULL, NULL} }; void torch_File_init(lua_State *L) { luaT_newmetatable(L, "torch.File", NULL, NULL, NULL, NULL); luaT_setfuncs(L, torch_File__, 0); lua_pop(L, 1); } File.lua000066400000000000000000000372451316246254300124610ustar00rootroot00000000000000local File = torch.getmetatable('torch.File') function File:writeBool(value) if value then self:writeInt(1) else self:writeInt(0) end end function File:readBool() return (self:readInt() == 1) end local TYPE_NIL = 0 local TYPE_NUMBER = 1 local TYPE_STRING = 2 local TYPE_TABLE = 3 local TYPE_TORCH = 4 local TYPE_BOOLEAN = 5 local TYPE_FUNCTION = 6 local TYPE_RECUR_FUNCTION = 8 local LEGACY_TYPE_RECUR_FUNCTION = 7 -- Lua 5.2 compatibility local loadstring = loadstring or load function File:isWritableObject(object) local typename = type(object) local typeidx if type(object) ~= 'boolean' and not object then typeidx = TYPE_NIL elseif torch.typename(object) and torch.factory(torch.typename(object)) then typeidx = TYPE_TORCH elseif typename == 'table' then typeidx = TYPE_TABLE elseif typename == 'number' then typeidx = TYPE_NUMBER elseif typename == 'string' then typeidx = TYPE_STRING elseif typename == 'boolean' then typeidx = TYPE_BOOLEAN elseif typename == 'function' and pcall(string.dump, object) then typeidx = TYPE_RECUR_FUNCTION end return typeidx end function File:referenced(ref) -- we use an environment to keep a record of written objects if not torch.getenv(self).writeObjects then torch.setenv(self, { writeObjects={}, writeObjectsRef={}, readObjects={}, objectNameStack={}, upvalueRefToId={}, upvalueIdToClosure={}, }) end local env = torch.getenv(self) env.force = not ref torch.setenv(self,env) return self end function File:isReferenced() -- if no environment, then no forcing setup yet if not torch.getenv(self).writeObjects then return true end local env = torch.getenv(self) return not env.force end local function getmetamethod(obj, name) local func local status -- check getmetatable(obj).__name or -- check getmetatable(obj).name status, func = pcall( function() -- note that sometimes the metatable is hidden -- we get it for sure through the torch type system local mt = torch.getmetatable(torch.typename(obj)) if mt then return mt['__' .. name] or mt[name] end end ) if status and type(func) == 'function' then return func end end local UPVALUES_TOKEN = {} -- unique object local function formatStack(objectNameStack) -- Format object name stack skipping UPVALUES_TOKEN and upvalue index local parts = {} for i, v in ipairs(objectNameStack) do if v ~= UPVALUES_TOKEN and objectNameStack[i-1] ~= UPVALUES_TOKEN then table.insert(parts, v) end end return table.concat(parts, '.') end function File:writeObject(object, debugname, hook) -- define a default hook function if not provided hook = hook or function(object) return object end -- we use an environment to keep a record of written objects if not torch.getenv(self).writeObjects then torch.setenv(self, { writeObjects={}, writeObjectsRef={}, readObjects={}, objectNameStack={}, upvalueRefToId={}, upvalueIdToClosure={}, }) end -- That guy is used for references' book-keeping local sobject = object -- That guy is the object that is actually persisted -- hook(object) can be used to modify the object before writing it to the file. -- Useful for serializing objects under a config -- that we want to deserialize safely under another config. -- (e.g. Cuda to Float tensors, cudnn to nn, ...) object = hook(object) local force = torch.getenv(self).force -- if nil object, only write the type and return if type(object) ~= 'boolean' and not object then self:writeInt(TYPE_NIL) return end local objectNameStack = torch.getenv(self).objectNameStack table.insert(objectNameStack, debugname or '') -- check the type we are dealing with local typeidx = self:isWritableObject(object) if not typeidx then error(string.format('Unwritable object <%s> at %s', type(object), formatStack(objectNameStack))) end self:writeInt(typeidx) if typeidx == TYPE_NUMBER then self:writeDouble(object) elseif typeidx == TYPE_BOOLEAN then self:writeBool(object) elseif typeidx == TYPE_STRING then local stringStorage = torch.CharStorage():string(object) self:writeInt(#stringStorage) self:writeChar(stringStorage) elseif typeidx == TYPE_TORCH or typeidx == TYPE_TABLE or typeidx == TYPE_RECUR_FUNCTION then -- check it exists already (we look at the pointer!) local objects = torch.getenv(self).writeObjects local objectsRef = torch.getenv(self).writeObjectsRef local index = objects[torch.pointer(sobject)] if index and (not force) then -- if already exists, write only its index self:writeInt(index) else -- else write the object itself index = objects.nWriteObject or 0 index = index + 1 if not force then objects[torch.pointer(sobject)] = index objectsRef[object] = index -- we make sure the object is not going to disappear end self:writeInt(index) objects.nWriteObject = index if typeidx == TYPE_RECUR_FUNCTION then local upvalueRefToId = torch.getenv(self).upvalueRefToId -- Unique ID for each ref since lightuserdata are not serializable local nextId = 1 for _ in pairs(upvalueRefToId) do nextId=nextId+1 end local upvalues = {} local counter = 0 while true do counter = counter + 1 local name,value = debug.getupvalue(object, counter) if not name then break end if name == '_ENV' then value = nil end local id=nil -- debug.upvalueid exists only for lua>=5.2 and luajit if debug.upvalueid then local upvalueRef = debug.upvalueid(object, counter) if not upvalueRefToId[upvalueRef] then upvalueRefToId[upvalueRef] = nextId nextId = nextId + 1 end id = upvalueRefToId[upvalueRef] end table.insert(upvalues, {name=name, id=id, value=value}) end local dumped = string.dump(object) local stringStorage = torch.CharStorage():string(dumped) self:writeInt(#stringStorage) self:writeChar(stringStorage) self:writeObject(upvalues, UPVALUES_TOKEN, hook) elseif typeidx == TYPE_TORCH then local version = torch.CharStorage():string('V ' .. torch.version(object)) local className = torch.CharStorage():string(torch.typename(object)) self:writeInt(#version) self:writeChar(version) self:writeInt(#className) self:writeChar(className) local write = getmetamethod(object, 'write') if write then write(object, self) elseif type(object) == 'table' then local var = {} for k,v in pairs(object) do if self:isWritableObject(v) then var[k] = v else print(string.format('$ Warning: cannot write object field <%s> of <%s> %s', k, torch.typename(object), formatStack(objectNameStack))) end end self:writeObject(var, torch.typename(object), hook) else error(string.format('<%s> is a non-serializable Torch object %s', torch.typename(object), formatStack(objectNameStack))) end else -- it is a table local size = 0; for k,v in pairs(object) do size = size + 1 end self:writeInt(size) for k,v in pairs(object) do self:writeObject(k, nil, hook) local name = (type(k) == 'string' or type(k) == 'number') and tostring(k) or nil -- special case name for upvalues if objectNameStack[#objectNameStack-1] == UPVALUES_TOKEN and name == 'value' and type(object.name) == 'string' then name = object.name end self:writeObject(v, name, hook) end end end else error('Unwritable object') end table.remove(objectNameStack) end function File:readObject() -- we use an environment to keep a record of read objects if not torch.getenv(self).writeObjects then torch.setenv(self, { writeObjects={}, writeObjectsRef={}, readObjects={}, objectNameStack={}, upvalueRefToId={}, upvalueIdToClosure={}, }) end local force = torch.getenv(self).force -- read the typeidx local typeidx = self:readInt() -- is it nil? if typeidx == TYPE_NIL then return nil end if typeidx == TYPE_NUMBER then return self:readDouble() elseif typeidx == TYPE_BOOLEAN then return self:readBool() elseif typeidx == TYPE_STRING then local size = self:readInt() return self:readChar(size):string() elseif typeidx == TYPE_FUNCTION then local size = self:readInt() local dumped = self:readChar(size):string() local func, err = loadstring(dumped) if not func then io.stderr:write(string.format('Warning: Failed to load function from bytecode: %s', err)) end local upvalues = self:readObject() for index,upvalue in ipairs(upvalues) do debug.setupvalue(func, index, upvalue) end return func elseif typeidx == TYPE_TABLE or typeidx == TYPE_TORCH or typeidx == TYPE_RECUR_FUNCTION or typeidx == LEGACY_TYPE_RECUR_FUNCTION then -- read the index local index = self:readInt() -- check it is loaded already local objects = torch.getenv(self).readObjects if objects[index] and not force then return objects[index] end -- otherwise read it if typeidx == TYPE_RECUR_FUNCTION or typeidx == LEGACY_TYPE_RECUR_FUNCTION then local size = self:readInt() local dumped = self:readChar(size):string() local func, err = loadstring(dumped) if not func then io.stderr:write(string.format('Warning: Failed to load function from bytecode: %s', err)) end if not force then objects[index] = func end local upvalueIdToClosure = torch.getenv(self).upvalueIdToClosure local upvalues = self:readObject() for index,upvalue in ipairs(upvalues) do if typeidx == LEGACY_TYPE_RECUR_FUNCTION then debug.setupvalue(func, index, upvalue) elseif upvalue.name == '_ENV' then debug.setupvalue(func, index, _ENV) else debug.setupvalue(func, index, upvalue.value) -- debug.upvaluejoin exists only for lua>=5.2 and luajit if debug.upvaluejoin and upvalue.id then if upvalueIdToClosure[upvalue.id] then -- This upvalue is linked to another one local otherClosure = upvalueIdToClosure[upvalue.id] debug.upvaluejoin(func, index, otherClosure.func, otherClosure.index) else -- Save this closure for next time upvalueIdToClosure[upvalue.id] = { func = func, index = index, } end end end end return func elseif typeidx == TYPE_TORCH then local version, className, versionNumber version = self:readChar(self:readInt()):string() versionNumber = tonumber(string.match(version, '^V (.*)$')) if not versionNumber then className = version versionNumber = 0 -- file created before existence of versioning system else className = self:readChar(self:readInt()):string() end if not torch.factory(className) then error(string.format('unknown Torch class <%s>', tostring(className))) end local object = torch.factory(className)(self) if not force then objects[index] = object end local read = getmetamethod(object, 'read') if read then read(object, self, versionNumber) elseif type(object) == 'table' then local var = self:readObject() for k,v in pairs(var) do object[k] = v end else error(string.format('Cannot load object class <%s>', tostring(className))) end return object else -- it is a table local size = self:readInt() local object = {} if not force then objects[index] = object end for i = 1,size do local k = self:readObject() local v = self:readObject() object[k] = v end return object end else error('unknown object') end end -- simple helpers to save/load arbitrary objects/tables function torch.save(filename, object, mode, referenced) assert(mode == nil or mode == 'binary' or mode == 'ascii', '"binary" or "ascii" (or nil) expected for mode') assert(referenced == nil or referenced == true or referenced == false, 'true or false (or nil) expected for referenced') mode = mode or 'binary' referenced = referenced == nil and true or referenced local file = torch.DiskFile(filename, 'w') file[mode](file) file:referenced(referenced) file:writeObject(object) file:close() end function torch.load(filename, mode, referenced) assert(mode == 'binary' or mode == 'b32' or mode == 'b64' or mode == nil or mode == 'ascii', '"binary", "b32", "b64" or "ascii" (or nil) expected for mode') assert(referenced == nil or referenced == true or referenced == false, 'true or false (or nil) expected for referenced') local longSize if mode == 'b32' or mode == 'b64' then longSize = tonumber(mode:match('%d+')) / 8 mode = 'binary' end mode = mode or 'binary' referenced = referenced == nil and true or referenced local file = torch.DiskFile(filename, 'r') file[mode](file) file:referenced(referenced) if longSize then file:longSize(longSize) end local object = file:readObject() file:close() return object end -- simple helpers to serialize/deserialize arbitrary objects/tables function torch.serialize(object, mode) local storage = torch.serializeToStorage(object, mode) return storage:string() end -- Serialize to a CharStorage, not a lua string. This avoids function torch.serializeToStorage(object, mode) mode = mode or 'binary' local f = torch.MemoryFile() f = f[mode](f) f:writeObject(object) local storage = f:storage() -- the storage includes an extra NULL character: get rid of it storage:resize(storage:size()-1) f:close() return storage end function torch.deserializeFromStorage(storage, mode) mode = mode or 'binary' local tx = torch.CharTensor(storage) local xp = torch.CharStorage(tx:size(1)+1) local txp = torch.CharTensor(xp) txp:narrow(1,1,tx:size(1)):copy(tx) txp[tx:size(1)+1] = 0 local f = torch.MemoryFile(xp) f = f[mode](f) local object = f:readObject() f:close() return object end function torch.deserialize(str, mode) local storage = torch.CharStorage():string(str) return torch.deserializeFromStorage(storage, mode) end -- public API (saveobj/loadobj are safe for global import) torch.saveobj = torch.save torch.loadobj = torch.load Generator.c000066400000000000000000000023401316246254300131550ustar00rootroot00000000000000#include int torch_Generator_new(lua_State *L) { THGenerator *gen = THGenerator_new(); luaT_pushudata(L, gen, torch_Generator); return 1; } int torch_Generator_free(lua_State *L) { THGenerator *gen= luaT_checkudata(L, 1, torch_Generator); THGenerator_free(gen); return 0; } static int torch_Generator_write(lua_State *L) { THGenerator *gen = luaT_checkudata(L, 1, torch_Generator); THFile *file = luaT_checkudata(L, 2, "torch.File"); THFile_writeByteRaw(file, (unsigned char *)gen, sizeof(THGenerator)); return 0; } static int torch_Generator_read(lua_State *L) { THGenerator *gen = luaT_checkudata(L, 1, torch_Generator); THFile *file = luaT_checkudata(L, 2, "torch.File"); THFile_readByteRaw(file, (unsigned char *)gen, sizeof(THGenerator)); return 0; } static const struct luaL_Reg torch_Generator_table_ [] = { {"write", torch_Generator_write}, {"read", torch_Generator_read}, {NULL, NULL} }; #define torch_Generator_factory torch_Generator_new void torch_Generator_init(lua_State *L) { luaT_newmetatable(L, torch_Generator, NULL, torch_Generator_new, torch_Generator_free, torch_Generator_factory); luaT_setfuncs(L, torch_Generator_table_, 0); lua_pop(L, 1); } MemoryFile.c000066400000000000000000000034541316246254300133060ustar00rootroot00000000000000#include "general.h" static int torch_MemoryFile_new(lua_State *L) { const char *mode; THCharStorage *storage = luaT_toudata(L, 1, "torch.CharStorage"); THFile *self; if(storage) { mode = luaL_optstring(L, 2, "rw"); self = THMemoryFile_newWithStorage(storage, mode); } else { mode = luaL_optstring(L, 1, "rw"); self = THMemoryFile_new(mode); } luaT_pushudata(L, self, "torch.MemoryFile"); return 1; } static int torch_MemoryFile_storage(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.MemoryFile"); THCharStorage_retain(THMemoryFile_storage(self)); luaT_pushudata(L, THMemoryFile_storage(self), "torch.CharStorage"); return 1; } static int torch_longSize(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.MemoryFile"); THMemoryFile_longSize(self, lua_tointeger(L, 2)); lua_settop(L, 1); return 1; } static int torch_MemoryFile_free(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.MemoryFile"); THFile_free(self); return 0; } static int torch_MemoryFile___tostring__(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.MemoryFile"); lua_pushfstring(L, "torch.MemoryFile [status: %s -- mode: %c%c]", (THFile_isOpened(self) ? "open" : "closed"), (THFile_isReadable(self) ? 'r' : ' '), (THFile_isWritable(self) ? 'w' : ' ')); return 1; } static const struct luaL_Reg torch_MemoryFile__ [] = { {"storage", torch_MemoryFile_storage}, {"longSize", torch_longSize}, {"__tostring__", torch_MemoryFile___tostring__}, {NULL, NULL} }; void torch_MemoryFile_init(lua_State *L) { luaT_newmetatable(L, "torch.MemoryFile", "torch.File", torch_MemoryFile_new, torch_MemoryFile_free, NULL); luaT_setfuncs(L, torch_MemoryFile__, 0); lua_pop(L, 1); } PipeFile.c000066400000000000000000000023151316246254300127260ustar00rootroot00000000000000#include "general.h" static int torch_PipeFile_new(lua_State *L) { const char *name = luaL_checkstring(L, 1); const char *mode = luaL_optstring(L, 2, "r"); int isQuiet = luaT_optboolean(L, 3, 0); THFile *self = THPipeFile_new(name, mode, isQuiet); luaT_pushudata(L, self, "torch.PipeFile"); return 1; } static int torch_PipeFile_free(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.PipeFile"); THFile_free(self); return 0; } static int torch_PipeFile___tostring__(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.PipeFile"); lua_pushfstring(L, "torch.PipeFile on <%s> [status: %s -- mode: %c%c]", THDiskFile_name(self), (THFile_isOpened(self) ? "open" : "closed"), (THFile_isReadable(self) ? 'r' : ' '), (THFile_isWritable(self) ? 'w' : ' ')); return 1; } static const struct luaL_Reg torch_PipeFile__ [] = { {"__tostring__", torch_PipeFile___tostring__}, {NULL, NULL} }; void torch_PipeFile_init(lua_State *L) { luaT_newmetatable(L, "torch.PipeFile", "torch.DiskFile", torch_PipeFile_new, torch_PipeFile_free, NULL); luaT_setfuncs(L, torch_PipeFile__, 0); lua_pop(L, 1); } README.md000066400000000000000000000054161316246254300123510ustar00rootroot00000000000000[![Join the chat at https://gitter.im/torch/torch7](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/torch/torch7?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Build Status](https://travis-ci.org/torch/torch7.svg)](https://travis-ci.org/torch/torch7) ## Need help? ## * Questions, Support, Install issues: [Google groups](https://groups.google.com/forum/#!forum/torch7) * Reporting bugs: [torch7](https://github.com/torch/torch7/issues) [nn](https://github.com/torch/nn/issues) [cutorch](https://github.com/torch/cutorch/issues) [cunn](https://github.com/torch/cutorch/issues) [optim](https://github.com/torch/optim/issues) [threads](https://github.com/torch/threads/issues) * Hanging out with other developers and users (strictly no install issues, no large blobs of text): [Gitter Chat](https://gitter.im/torch/torch7) # Torch Package Reference Manual # __Torch__ is the main package in [Torch7](http://torch.ch) where data structures for multi-dimensional tensors and mathematical operations over these are defined. Additionally, it provides many utilities for accessing files, serializing objects of arbitrary types and other useful utilities. ## Torch Packages ## * Tensor Library * [Tensor](doc/tensor.md) defines the _all powerful_ tensor object that provides multi-dimensional numerical arrays with type templating. * [Mathematical operations](doc/maths.md) that are defined for the tensor object types. * [Storage](doc/storage.md) defines a simple storage interface that controls the underlying storage for any tensor object. * File I/O Interface Library * [File](doc/file.md) is an abstract interface for common file operations. * [Disk File](doc/diskfile.md) defines operations on files stored on disk. * [Memory File](doc/memoryfile.md) defines operations on stored in RAM. * [Pipe File](doc/pipefile.md) defines operations for using piped commands. * [High-Level File operations](doc/serialization.md) defines higher-level serialization functions. * Useful Utilities * [Timer](doc/timer.md) provides functionality for _measuring time_. * [Tester](doc/tester.md) is a generic tester framework. * [CmdLine](doc/cmdline.md) is a command line argument parsing utility. * [Random](doc/random.md) defines a random number generator package with various distributions. * Finally useful [utility](doc/utility.md) functions are provided for easy handling of torch tensor types and class inheritance. ## Useful Links ## * [Community packages](https://github.com/torch/torch7/wiki/Cheatsheet) * [Torch Blog](http://torch.ch/blog/) * [Torch Slides](https://github.com/soumith/cvpr2015/blob/master/cvpr-torch.pdf) ROADMAP.md000066400000000000000000000243461316246254300125020ustar00rootroot00000000000000 # Torch Roadmap (August 2015 - March 2016) This roadmap document is intended to serve as a loose plan of our vision for Torch in the short term. It is open to community feedback and contribution and only intends to serve as an initial draft. After community feedback, we shall freeze it and work on it. The roadmap focuses on five separate things - Core development: improving the core technically. Design changes, code refactors, performance, they go here. - Documentation and Accessibility: Outlining the changes in documentation, and improving general user and developer documentation in various ways. - Versioning and Packaging: Planned and much needed changes to the packaging of Torch are discussed here. - Continuous Build Infrastructure: Making our continuous builds more robust, introducing CUDA and OpenCL contbuilds etc. - Other improvements ## Torch Core Project Development - New class system: - **[definite]** with no global side-effects (i.e. the class constructor should be scoped into its parent package) Get rid of every statement/system that has a global effect on the environment (torch.setdefaultensortype => dangerous and not clean) - **[needs discussion]** fully serializable (i.e. when deserializing/reloading a model, there shouldn't be a need to load libraries that defined the class originally, like nn; the class definition should be serialized as well: this would remove a lot of backward compatibility hacks that we have to add to class definitions currently - **koray**: I like this, but wouldn't it break backward compatibility? Currently, whatever we serialize, it is just the data and implementation is defined at load time, so if a bug is fixed (or introduced) you use that. And it starts being ambiguous, what if I load a layer from file and create a new one and their implementation is inconsistent...) - **[definite]** Get rid of non-tensor-related stuff (like serialization) in TH, and move it to lua side - **[needs discussion]** OpenMP: Should it stay or go? Is Threads sufficient? - **Ronan**: I really wonder about this guy, especially now that I have been using threads intensively. I am not sure that fine-grine threading is necessary. - **koray**: I guess you mean with threading, there is no need for OpenMP, but I disagree. Our convolution layer will use multiple threads and then if we run a ReLu over a huge state space, it would become embarrassingly slow. We shouldn't expect everyone to run their experiments in a threading framework. It is more work than necessary sometimes.) - **[needs discussion]** Templated C++ in TH Core? - **Ronan**: Should I cleanup TH core? In the end, I am scared to move to C++, but some iterators based taking a closure could be nice (I have some of those that I could add easily). I could move to C++ if it was only template + keeping pointers (and not C++11/14/17, because that would limit the number of users that it can reach because of the latest compilers needed etc.). - **[definite]** Migrate to a single, better/modern testing support - **koray**: like some aspects of Totem, but should be in core Tester - **[definite]** Benchmarking support in Tester - **[definite]** Consistent testing scripts across all core projects - **[definite]** 'nn' container unified interface between containers and graph - **[mostly definite]** Switch to batch only assumption in 'nn'. Right now, the code is unnecessarily complicated for stochastic/batch confusion, we needed extra functions like nInputDims and such. - **[needs discussion]** Support named arguments in the constructor for all 'nn' layers. - **[definite]** 'rnn' package. - **Soumith**: Nicholas Leonard's seems to be a good one. - **[mostly definite]** argcheck for all core functions in torch. Get rid of cwrap's ugliness. - **[definite]** improve paths to support more file system operations - **Clement**: could lfs and penlight be made more standard? penlight is a heavy package but provides so much utility - **Soumith**: I think penlight is lightweight and provides strong utility, definitely consider dependence. - **[definite]** JIT/Lua/FFI/GC: - **koray**: I think Torch should be agnostic to whatever is the backend; - **clement**: yes! - at this point, we need to have all core packages use the regular Lua api (almost the case) - **Ronan**: agreed. - **[definite]** plan to have standalone FFI? - Facebook releases their puc LUA based FFI package mostly improved by Sam Gross - [needs discussion] **Ronan** improves it a bit more to use Leon's C99 parser - **Koray**: I am not opposed to Leon's C99 parser, but we should not have the QT like situation where it relies mostly on Leon to maintain it. And, still we need to have FFI since there are people and packages that rely on it now. - **[definite]** Lua 5.2 migration (I think it's already finished ;) ). - **[mostly definite]** Lua 5.3 migration - **[mostly definite]** Optionally replace GC by Ref-counting (existing version in luajit-rocks; but completely broken but will need to be fixed) - **[needs discussion]** Make OpenCL support more visible under torch/opencl (**Soumith**: Hugh Perkins will maintain it of course ;) ). - **[definite]** Split nn into THNN and nn. THNN would be NN package using TH as backend and nn would be the lua layer. THNN can be used as a standalone C library. Same for cunn - **[Definite]** CUDA typed tensor support - CudaHalfTensor CudaDoubleTensor etc. - **[Definite]** better plotting support - **[needs discussion]** UI package that doesn't suck? - **Ronan**: something based on cairo? - **clement**: not sure if this would have much adoption - **Ronan**: yes, it is a worry. I started to do some fancy stuff there, it is not that hard. However, I would need quite some time to polish it. I think having something fully customizable from lua really makes a difference (rather than something like Qt, for example). - something based on a web client? - **clement**: i like the idea of itorch but could never easily build it, build process is too big. - **Ronan**: I cannot use something which forces me to use global variables. - **koray**: I think at the end of the day, we need to have both a GUI client and a web based client. My main problem with web based clients is that I can't easily create custom displays to play an animation or such. It is an offline process that I need to generate a movie and then load it in. This and similar things make it hard to use for me. Also, I agree, I actually could not install iTorch on my laptop before cvpr tutorial somehow, it did not want to work :). - **soumith**: I think we should propose a common display API that any interface can implement, that way the users don't need to change scripts across different UI backends. Also, szym/display is a good candidate for the Web UI, ITorch is indeed a bit of a pain to install. - Should we endorse iTorch for everyone to use? - **Ronan**: I know **Soumith** likes it, but I am not a big fan. - Heavy+encourages the use of global variables. Excellent for tutorials, though. - This ties to the first question in **Other Questions** section. - Can we/community do pull requests on iTorch? ( **Soumith**: Yes ) - First step would be to leanify dependencies and/or install procedure (**Soumith**: agreed) - **[needs discussion]** How about Penlight? It has many crucial things that people use. Should we endorse it, use some things from it? Replicate some things in penlight in torch? - **clement**: upvoting this! we use it extensively. - **Ronan**: I live better with less abstractions, but I can be convinced there. However, I find penlight quite big. There are things like the classes that I do not like as well (because of the way they chose for creating classes). - **[needs discussion]** how about Moses? New lean functional package that's pretty useful - **[definite]** A style guide - Guidelines are super important: - for Lua: at least impose strict camel case + 3 spaces (no tab) - for C: camel case + use of underscore to represent namespace scoping + 2 spaces ## Documentation + Accessibility - Tutorials: provide guidelines and basic framework/standard to write and publish tutorials? - Universal dataset API - Dataset classes for several popular datasets - high performance, thread support etc. - support CPU and GPU - Model Zoo + Training scripts, with training scripts we can highlight Torch's strengths - How do we build a super friendly model zoo? git repo of pre-trained models? - Better documentation support, have a doc server - Documentation for TH/THC interface and design - Inline documentation parser - doc/shell integration (maybe this is still working but needs redoing?) ## Versioning + Packaging - Package owners need to start releasing frequent versions (i.e. torch v7.0.1, 7.0.2, ...) - scm packages should become deprecated - Packages need to avoid global side effects, and return themselves as simple tables (Lua 5.2 started enforcing this on the C side) - Provide standard AMI instances that people can launch (already loosely done by the community). We can load it with many standard+optional packages and/or provide one line option to update to latest. ## Build Infrastructure Requirements - Prepare core distro release - Professional Continuous build for distro and individual core projects - Continuous build for GPU - continuous build should include testing - The distro should be build and tested at every pull into any of the member projects - CI for Linux and OSX ## Other Questions? - If there is a project that seems good from outside or consortium, how do we endorse/improve/modify that? - do we put some technical criteria to do that? - being able to do pull requests? - Licensing? - or maybe maintain a list of suggested packages? - when does existence of a package stop us from developing the same in core torch? - **Soumith**: I think this should largely be community driven and by popularity. Top starred or watched repos in the ecosystem would be a good start. Storage.c000066400000000000000000000006241316246254300126360ustar00rootroot00000000000000#include "general.h" #define torch_Storage_(NAME) TH_CONCAT_4(torch_,Real,Storage_,NAME) #define THFile_readRealRaw TH_CONCAT_3(THFile_read, Real, Raw) #define THFile_writeRealRaw TH_CONCAT_3(THFile_write, Real, Raw) #define torch_Storage TH_CONCAT_STRING_3(torch.,Real,Storage) #include "generic/Storage.c" #include "THGenerateAllTypes.h" #include "generic/Storage.c" #include "THGenerateHalfType.h" Tensor.c000066400000000000000000000006201316246254300125000ustar00rootroot00000000000000#include "general.h" #define torch_Storage_(NAME) TH_CONCAT_4(torch_,Real,Storage_,NAME) #define torch_Storage TH_CONCAT_STRING_3(torch.,Real,Storage) #define torch_Tensor_(NAME) TH_CONCAT_4(torch_,Real,Tensor_,NAME) #define torch_Tensor TH_CONCAT_STRING_3(torch.,Real,Tensor) #include "generic/Tensor.c" #include "THGenerateAllTypes.h" #include "generic/Tensor.c" #include "THGenerateHalfType.h" Tensor.lua000066400000000000000000000405001316246254300130400ustar00rootroot00000000000000-- additional methods for Storage local Storage = {} -- additional methods for Tensor local Tensor = {} -- types local types = {'Byte', 'Char', 'Short', 'Int', 'Long', 'Float', 'Half', 'Double'} -- Lua 5.2 compatibility local log10 = math.log10 or function(x) return math.log(x, 10) end -- tostring() functions for Tensor and Storage local function Storage__printformat(self) if self:size() == 0 then return "", nil, 0 end local intMode = true local type = torch.typename(self) -- if type == 'torch.FloatStorage' or type == 'torch.DoubleStorage' then for i=1,self:size() do if self[i] ~= math.ceil(self[i]) then intMode = false break end end -- end local tensor = torch.DoubleTensor(torch.DoubleStorage(self:size()):copy(self), 1, self:size()):abs() local expMin = tensor:min() if expMin ~= 0 then expMin = math.floor(log10(expMin)) + 1 else expMin = 1 end local expMax = tensor:max() if expMax ~= 0 then expMax = math.floor(log10(expMax)) + 1 else expMax = 1 end local format local scale local sz if intMode then if expMax > 9 then format = "%11.4e" sz = 11 else format = "%SZd" sz = expMax + 1 end else if expMax-expMin > 4 then format = "%SZ.4e" sz = 11 if math.abs(expMax) > 99 or math.abs(expMin) > 99 then sz = sz + 1 end else if expMax > 5 or expMax < 0 then format = "%SZ.4f" sz = 7 scale = math.pow(10, expMax-1) else format = "%SZ.4f" if expMax == 0 then sz = 7 else sz = expMax+6 end end end end format = string.gsub(format, 'SZ', sz) if scale == 1 then scale = nil end return format, scale, sz end function Storage.__tostring__(self) local strt = {} local format,scale = Storage__printformat(self) if format:sub(2,4) == 'nan' then format = '%f' end if scale then table.insert(strt, string.format('%g', scale) .. ' *\n') for i = 1,self:size() do table.insert(strt, string.format(format, self[i]/scale) .. '\n') end else for i = 1,self:size() do table.insert(strt, string.format(format, self[i]) .. '\n') end end table.insert(strt, '[' .. torch.typename(self) .. ' of size ' .. self:size() .. ']\n') local str = table.concat(strt) return str end for _,type in ipairs(types) do local metatable = torch.getmetatable('torch.' .. type .. 'Storage') for funcname, func in pairs(Storage) do rawset(metatable, funcname, func) end end local function Tensor__printMatrix(self, indent) local format,scale,sz = Storage__printformat(self:storage()) if format:sub(2,4) == 'nan' then format = '%f' end -- print('format = ' .. format) scale = scale or 1 indent = indent or '' local strt = {indent} local nColumnPerLine = math.floor((80-#indent)/(sz+1)) -- print('sz = ' .. sz .. ' and nColumnPerLine = ' .. nColumnPerLine) local firstColumn = 1 local lastColumn = -1 while firstColumn <= self:size(2) do if firstColumn + nColumnPerLine - 1 <= self:size(2) then lastColumn = firstColumn + nColumnPerLine - 1 else lastColumn = self:size(2) end if nColumnPerLine < self:size(2) then if firstColumn ~= 1 then table.insert(strt, '\n') end table.insert(strt, 'Columns ' .. firstColumn .. ' to ' .. lastColumn .. '\n' .. indent) end if scale ~= 1 then table.insert(strt, string.format('%g', scale) .. ' *\n ' .. indent) end for l=1,self:size(1) do local row = self:select(1, l) for c=firstColumn,lastColumn do table.insert(strt, string.format(format, row[c]/scale)) if c == lastColumn then table.insert(strt, '\n') if l~=self:size(1) then if scale ~= 1 then table.insert(strt, indent .. ' ') else table.insert(strt, indent) end end else table.insert(strt, ' ') end end end firstColumn = lastColumn + 1 end local str = table.concat(strt) return str end local function Tensor__printTensor(self) local counter = torch.LongStorage(self:nDimension()-2) local strt = {''} local finished counter:fill(1) counter[1] = 0 while true do for i=1,self:nDimension()-2 do counter[i] = counter[i] + 1 if counter[i] > self:size(i) then if i == self:nDimension()-2 then finished = true break end counter[i] = 1 else break end end if finished then break end -- print(counter) if #strt > 1 then table.insert(strt, '\n') end table.insert(strt, '(') local tensor = self for i=1,self:nDimension()-2 do tensor = tensor:select(1, counter[i]) table.insert(strt, counter[i] .. ',') end table.insert(strt, '.,.) = \n') table.insert(strt, Tensor__printMatrix(tensor, ' ')) end return table.concat(strt) end function Tensor.__tostring__(self) local strt = {''} if self:nDimension() == 0 then table.insert(strt, '[' .. torch.typename(self) .. ' with no dimension]\n') else local tensor = torch.DoubleTensor():resize(self:size()):copy(self) if tensor:nDimension() == 1 then local format,scale,sz = Storage__printformat(tensor:storage()) if format:sub(2,4) == 'nan' then format = '%f' end if scale then table.insert(strt, string.format('%g', scale) .. ' *\n') for i = 1,tensor:size(1) do table.insert(strt, string.format(format, tensor[i]/scale) .. '\n') end else for i = 1,tensor:size(1) do table.insert(strt, string.format(format, tensor[i]) .. '\n') end end table.insert(strt, '[' .. torch.typename(self) .. ' of size ' .. tensor:size(1) .. ']\n') elseif tensor:nDimension() == 2 then table.insert(strt, Tensor__printMatrix(tensor)) table.insert(strt, '[' .. torch.typename(self) .. ' of size ' .. tensor:size(1) .. 'x' .. tensor:size(2) .. ']\n') else table.insert(strt, Tensor__printTensor(tensor)) table.insert(strt, '[' .. torch.typename(self) .. ' of size ') for i=1,tensor:nDimension() do table.insert(strt, tensor:size(i)) if i ~= tensor:nDimension() then table.insert(strt, 'x') end end table.insert(strt, ']\n') end end return table.concat(strt) end function Tensor.type(self,type) local current = torch.typename(self) if not type then return current end if type ~= current then local new = torch.getmetatable(type).new() if self:nElement() > 0 then new:resize(self:size()):copy(self) end return new else return self end end function Tensor.typeAs(self,tensor) return self:type(tensor:type()) end function Tensor.byte(self) return self:type('torch.ByteTensor') end function Tensor.char(self) return self:type('torch.CharTensor') end function Tensor.short(self) return self:type('torch.ShortTensor') end function Tensor.int(self) return self:type('torch.IntTensor') end function Tensor.long(self) return self:type('torch.LongTensor') end function Tensor.float(self) return self:type('torch.FloatTensor') end function Tensor.double(self) return self:type('torch.DoubleTensor') end function Tensor.half(self) return self:type('torch.HalfTensor') end function Tensor.real(self) return self:type(torch.getdefaulttensortype()) end function Tensor.expand(result,tensor,...) -- get sizes local sizes = {...} local t = torch.type(tensor) if (t == 'number' or t == 'torch.LongStorage') then table.insert(sizes,1,tensor) tensor = result result = tensor.new() end -- check type local size if torch.type(sizes[1])=='torch.LongStorage' then size = sizes[1] else size = torch.LongStorage(#sizes) for i,s in ipairs(sizes) do size[i] = s end end -- get dimensions local tensor_dim = tensor:dim() local tensor_stride = tensor:stride() local tensor_size = tensor:size() -- check nb of dimensions if #size ~= tensor:dim() then error('the number of dimensions provided must equal tensor:dim()') end -- create a new geometry for tensor: for i = 1,tensor_dim do if tensor_size[i] == 1 then tensor_size[i] = size[i] tensor_stride[i] = 0 elseif tensor_size[i] ~= size[i] then error('incorrect size: only supporting singleton expansion (size=1)') end end -- create new view, with singleton expansion: result:set(tensor:storage(), tensor:storageOffset(), tensor_size, tensor_stride) return result end torch.expand = Tensor.expand function Tensor.expandAs(result,tensor,template) if template then return result:expand(tensor,template:size()) end return result:expand(tensor:size()) end torch.expandAs = Tensor.expandAs function Tensor.repeatTensor(result,tensor,...) -- get sizes local sizes = {...} local t = torch.type(tensor) if (t == 'number' or t == 'torch.LongStorage') then table.insert(sizes,1,tensor) tensor = result result = tensor.new() end -- if not contiguous, then force the tensor to be contiguous if not tensor:isContiguous() then tensor = tensor:clone() end -- check type local size if torch.type(sizes[1])=='torch.LongStorage' then size = sizes[1] else size = torch.LongStorage(#sizes) for i,s in ipairs(sizes) do size[i] = s end end if size:size() < tensor:dim() then error('Number of dimensions of repeat dims can not be smaller than number of dimensions of tensor') end local xtensor = tensor.new():set(tensor) local xsize = xtensor:size():totable() for i=1,size:size()-tensor:dim() do table.insert(xsize,1,1) end size = torch.DoubleTensor(xsize):cmul(torch.DoubleTensor(size:totable())):long():storage() xtensor:resize(torch.LongStorage(xsize)) result:resize(size) local urtensor = result.new(result) for i=1,xtensor:dim() do urtensor = urtensor:unfold(i,xtensor:size(i),xtensor:size(i)) end for i=1,urtensor:dim()-xtensor:dim() do table.insert(xsize,1,1) end xtensor:resize(torch.LongStorage(xsize)) local xxtensor = xtensor:expandAs(urtensor) urtensor:copy(xxtensor) return result end torch.repeatTensor = Tensor.repeatTensor --- One of the size elements can be -1, --- a new LongStorage is then returned. --- The length of the unspecified dimension --- is inferred from the number of remaining elements. local function specifyFully(size, nElements) local nCoveredElements = 1 local remainingDim = nil local sizes = size:totable() for i = 1, #sizes do local wantedDimSize = sizes[i] if wantedDimSize == -1 then if remainingDim then error("Only one of torch.view dimensions can be -1.") end remainingDim = i else nCoveredElements = nCoveredElements * wantedDimSize end end if not remainingDim then return size end assert(nElements % nCoveredElements == 0, "The number of covered elements is not a multiple of all elements.") local copy = torch.LongStorage(sizes) copy[remainingDim] = nElements / nCoveredElements return copy end -- TODO : This should be implemented in TH and and wrapped. function Tensor.view(result, src, ...) local size = ... local view, tensor local function istensor(tensor) return torch.typename(tensor) and torch.typename(tensor):find('torch.*Tensor') end local function isstorage(storage) return torch.typename(storage) and torch.typename(storage) == 'torch.LongStorage' end if istensor(result) and istensor(src) and type(size) == 'number' then size = torch.LongStorage{...} view = result tensor = src elseif istensor(result) and istensor(src) and isstorage(size) then size = size view = result tensor = src elseif istensor(result) and isstorage(src) and size == nil then size = src tensor = result view = tensor.new() elseif istensor(result) and type(src) == 'number' then size = {...} table.insert(size,1,src) size = torch.LongStorage(size) tensor = result view = tensor.new() else local t1 = 'torch.Tensor, torch.Tensor, number [, number ]*' local t2 = 'torch.Tensor, torch.Tensor, torch.LongStorage' local t3 = 'torch.Tensor, torch.LongStorage' local t4 = 'torch.Tensor, number [, number ]*' error(string.format('torch.view, expected (%s) or\n (%s) or\n (%s)\n or (%s)', t1, t2, t3, t4)) end local origNElement = tensor:nElement() size = specifyFully(size, origNElement) assert(tensor:isContiguous(), "expecting a contiguous tensor") view:set(tensor:storage(), tensor:storageOffset(), size) if view:nElement() ~= origNElement then local inputSize = table.concat(tensor:size():totable(), "x") local outputSize = table.concat(size:totable(), "x") error(string.format("Wrong size for view. Input size: %s. Output size: %s", inputSize, outputSize)) end return view end torch.view = Tensor.view function Tensor.viewAs(result, src, template) if template and torch.typename(template) then return result:view(src, template:size()) elseif template == nil then template = src src = result result = src.new() return result:view(src, template:size()) else local t1 = 'torch.Tensor, torch.Tensor, torch.LongStorage' local t2 = 'torch.Tensor, torch.LongStorage' error(string.format('expecting (%s) or (%s)', t1, t2)) end end torch.viewAs = Tensor.viewAs function Tensor.split(result, tensor, splitSize, dim) if torch.type(result) ~= 'table' then dim = splitSize splitSize = tensor tensor = result result = {} else -- empty existing result table before using it for k,v in pairs(result) do result[k] = nil end end dim = dim or 1 local start = 1 while start <= tensor:size(dim) do local size = math.min(splitSize, tensor:size(dim) - start + 1) local split = tensor:narrow(dim, start, size) table.insert(result, split) start = start + size end return result end torch.split = Tensor.split function Tensor.chunk(result, tensor, nChunk, dim) if torch.type(result) ~= 'table' then dim = nChunk nChunk = tensor tensor = result result = {} end dim = dim or 1 local splitSize = math.ceil(tensor:size(dim)/nChunk) return torch.split(result, tensor, splitSize, dim) end torch.chunk = Tensor.chunk function Tensor.totable(tensor) local result = {} local dim = tensor:dim() if dim == 1 then tensor:apply(function(i) table.insert(result, i) end) elseif dim > 0 then for i = 1, tensor:size(1) do table.insert(result, tensor[i]:totable()) end end return result end torch.totable = Tensor.totable function Tensor.permute(tensor, ...) local perm = {...} local nDims = tensor:dim() assert(#perm == nDims, 'Invalid permutation') local j for i, p in ipairs(perm) do if p ~= i and p ~= 0 then j = i repeat assert(0 < perm[j] and perm[j] <= nDims, 'Invalid permutation') tensor = tensor:transpose(j, perm[j]) j, perm[j] = perm[j], 0 until perm[j] == i perm[j] = j end end return tensor end torch.permute = Tensor.permute for _,type in ipairs(types) do local metatable = torch.getmetatable('torch.' .. type .. 'Tensor') for funcname, func in pairs(Tensor) do if funcname ~= 'totable' or type ~='Half' then rawset(metatable, funcname, func) else local function Tensor__totable(self) local host_tensor = self:float() return self:float():totable() end rawset(torch.getmetatable('torch.HalfTensor'), 'totable', Tensor__totable) end end end TensorMath.lua000066400000000000000000001512711316246254300136620ustar00rootroot00000000000000local wrap = require 'cwrap' require 'torchcwrap' local interface = wrap.CInterface.new() local method = wrap.CInterface.new() local argtypes = wrap.CInterface.argtypes argtypes['ptrdiff_t'] = wrap.types.ptrdiff_t interface:print([[ #include "TH.h" #include "THMath.h" #include "luaT.h" #include "utils.h" ]]) -- specific to torch: we generate a 'dispatch' function -- first we create a helper function -- note that it let the "torch" table on the stack interface:print([[ static const void* torch_istensortype(lua_State *L, const char *tname) { if(!tname) return NULL; if(!luaT_pushmetatable(L, tname)) return NULL; lua_pushstring(L, "torch"); lua_rawget(L, -2); if(lua_istable(L, -1)) return tname; else { lua_pop(L, 2); return NULL; } return NULL; } ]]) interface:print([[ static int torch_isnonemptytable(lua_State *L, int idx) { int empty; if (!lua_istable(L, idx)) return 0; lua_rawgeti(L, idx, 1); empty = lua_isnil(L, -1); lua_pop(L, 1); return !empty; } ]]) interface:print([[ static const void* torch_istensorarray(lua_State *L, int idx) { const char* tname; int tensor_idx; if (!torch_isnonemptytable(L, idx)) return 0; lua_checkstack(L, 3); lua_rawgeti(L, idx, 1); tensor_idx = lua_gettop(L); tname = (torch_istensortype(L, luaT_typename(L, -1))); lua_remove(L, tensor_idx); return tname; } ]]) interface.dispatchregistry = {} function interface:wrap(name, ...) -- usual stuff wrap.CInterface.wrap(self, name, ...) -- dispatch function if not interface.dispatchregistry[name] then interface.dispatchregistry[name] = true table.insert(interface.dispatchregistry, {name=name, wrapname=string.format("torch_%s", name)}) interface:print(string.gsub([[ static int torch_NAME(lua_State *L) { int narg = lua_gettop(L); const void *tname; if(narg >= 1 && (tname = torch_istensortype(L, luaT_typename(L, 1)))) /* first argument is tensor? */ { } else if(narg >= 2 && (tname = torch_istensortype(L, luaT_typename(L, 2)))) /* second? */ { } else if(narg >= 1 && (tname = torch_istensorarray(L, 1))) /* torch table argument? */ { } else if(narg >= 1 && lua_type(L, narg) == LUA_TSTRING && (tname = torch_istensortype(L, lua_tostring(L, narg)))) /* do we have a valid tensor type string then? */ { lua_remove(L, -2); } else if(!(tname = torch_istensortype(L, torch_getdefaulttensortype(L)))) luaL_error(L, "internal error: the default tensor type does not seem to be an actual tensor"); lua_pushstring(L, "NAME"); lua_rawget(L, -2); if(lua_isfunction(L, -1)) { lua_insert(L, 1); lua_pop(L, 2); /* the two tables we put on the stack above */ lua_call(L, lua_gettop(L)-1, LUA_MULTRET); } else return luaL_error(L, "%s does not implement the torch.NAME() function", tname); return lua_gettop(L); } ]], 'NAME', name)) end end function interface:dispatchregister(name) local txt = self.txt table.insert(txt, string.format('static const struct luaL_Reg %s [] = {', name)) for _,reg in ipairs(self.dispatchregistry) do table.insert(txt, string.format('{"%s", %s},', reg.name, reg.wrapname)) end table.insert(txt, '{NULL, NULL}') table.insert(txt, '};') table.insert(txt, '') self.dispatchregistry = {} end interface:print('/* WARNING: autogenerated file */') interface:print('') local function wrap(...) local args = {...} -- interface interface:wrap(...) -- method: we override things possibly in method table field for _,x in ipairs(args) do if type(x) == 'table' then -- ok, now we have a list of args for _, arg in ipairs(x) do if arg.method then for k,v in pairs(arg.method) do if v == 'nil' then -- special case, we erase the field arg[k] = nil else arg[k] = v end end end end end end local unpack = unpack or table.unpack method:wrap(unpack(args)) end local reals = {ByteTensor='unsigned char', CharTensor='char', ShortTensor='short', IntTensor='int', LongTensor='long', FloatTensor='float', HalfTensor='half', DoubleTensor='double'} local accreals = {ByteTensor='long', CharTensor='long', ShortTensor='long', IntTensor='long', LongTensor='long', FloatTensor='double', HalfTensor='float', DoubleTensor='double'} for _,Tensor in ipairs({"ByteTensor", "CharTensor", "ShortTensor", "IntTensor", "LongTensor", "FloatTensor", "HalfTensor", "DoubleTensor"}) do local real = reals[Tensor] local accreal = accreals[Tensor] function interface.luaname2wrapname(self, name) return string.format('torch_%s_%s', Tensor, name) end function method.luaname2wrapname(self, name) return string.format('m_torch_%s_%s', Tensor, name) end local function cname(name) return string.format('TH%s_%s', Tensor, name) end local function lastdim(argn) return function(arg) return string.format("TH%s_nDimension(%s)", Tensor, arg.args[argn]:carg()) end end local function lastdimarray(argn) return function(arg) return string.format("TH%s_nDimension(arg%d_data[0])", Tensor, arg.args[argn].i) end end if Tensor ~= 'HalfTensor' then wrap("zero", cname("zero"), {{name=Tensor, returned=true}}) wrap("fill", cname("fill"), {{name=Tensor, returned=true}, {name=real}}) wrap("zeros", cname("zeros"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name="LongArg"}}) wrap("ones", cname("ones"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name="LongArg"}}) wrap("reshape", cname("reshape"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name="LongArg"}}) wrap("gather", cname("gather"), {{name=Tensor, default=true, returned=true, init=function(arg) return table.concat( { arg.__metatable.init(arg), string.format("THLongStorage* %s_size = THLongTensor_newSizeOf(%s);", arg:carg(), arg.args[4]:carg()), string.format("TH%s_resize(%s, %s_size, NULL);", Tensor, arg:carg(), arg:carg()), string.format("THLongStorage_free(%s_size);", arg:carg()) }, '\n') end }, {name=Tensor}, {name="index"}, {name="IndexTensor", noreadadd=true}}) wrap("scatter", cname("scatter"), {{name=Tensor, returned=true}, {name="index"}, {name="IndexTensor", noreadadd=true}, {name=Tensor}}, cname("scatterFill"), {{name=Tensor, returned=true}, {name="index"}, {name="IndexTensor", noreadadd=true}, {name=real}}) wrap("dot", cname("dot"), {{name=Tensor}, {name=Tensor}, {name=accreal, creturned=true}}) wrap("equal", cname("equal"), {{name=Tensor}, {name=Tensor}, {name="boolean", creturned=true}}) wrap("add", cname("add"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}, cname("cadd"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real, default=1}, {name=Tensor}}) wrap("csub", cname("sub"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}, cname("csub"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real, default=1}, {name=Tensor}}) wrap("mul", cname("mul"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}) wrap("div", cname("div"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}) wrap("lshift", cname("lshift"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}) wrap("rshift", cname("rshift"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}) wrap("fmod", cname("fmod"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}) wrap("remainder", cname("remainder"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}) wrap("bitand", cname("bitand"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}) wrap("bitor", cname("bitor"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}) wrap("bitxor", cname("bitxor"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}) -- mod alias wrap("mod", cname("fmod"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}) wrap("clamp", cname("clamp"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}, {name=real}}) wrap("match", cname("match"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor}, {name=Tensor}, {name=real, default=1} }) wrap("cmul", cname("cmul"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}) wrap("cpow", cname("cpow"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}) wrap("cdiv", cname("cdiv"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}) wrap("clshift", cname("clshift"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}) wrap("crshift", cname("crshift"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}) wrap("cfmod", cname("cfmod"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}) wrap("cremainder", cname("cremainder"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}) wrap("cbitand", cname("cbitand"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}) wrap("cbitor", cname("cbitor"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}) wrap("cbitxor", cname("cbitxor"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}) -- cmod alias wrap("cmod", cname("cfmod"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}) wrap("addcmul", cname("addcmul"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real, default=1}, {name=Tensor}, {name=Tensor}}) wrap("addcdiv", cname("addcdiv"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real, default=1}, {name=Tensor}, {name=Tensor}}) wrap("mv", cname("addmv"), {{name=Tensor, default=true, returned=true, method={default='nil'}, init=function(arg) return table.concat( { arg.__metatable.init(arg), string.format("TH%s_resize1d(%s, %s->size[0]);", Tensor, arg:carg(), arg.args[5]:carg()) }, '\n') end, precall=function(arg) return table.concat( { string.format("TH%s_zero(%s);", Tensor, arg:carg()), arg.__metatable.precall(arg) }, '\n') end, }, {name=real, default=0, invisible=true}, {name=Tensor, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=2}, {name=Tensor, dim=1}} ) wrap("mm", cname("addmm"), {{name=Tensor, default=true, returned=true, method={default='nil'}, init=function(arg) return table.concat( { arg.__metatable.init(arg), string.format("TH%s_resize2d(%s, %s->size[0], %s->size[1]);", Tensor, arg:carg(), arg.args[5]:carg(), arg.args[6]:carg()) }, '\n') end, precall=function(arg) return table.concat( { string.format("TH%s_zero(%s);", Tensor, arg:carg()), arg.__metatable.precall(arg) }, '\n') end, }, {name=real, default=0, invisible=true}, {name=Tensor, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=2}, {name=Tensor, dim=2}} ) wrap("bmm", cname("baddbmm"), {{name=Tensor, default=true, returned=true, method={default='nil'}, init=function(arg) return table.concat( { arg.__metatable.init(arg), string.format("TH%s_resize3d(%s, %s->size[0], %s->size[1], %s->size[2]);", Tensor, arg:carg(), arg.args[5]:carg(), arg.args[5]:carg(), arg.args[6]:carg()) }, '\n') end, precall=function(arg) return table.concat( { string.format("TH%s_zero(%s);", Tensor, arg:carg()), arg.__metatable.precall(arg) }, '\n') end, }, {name=real, default=0, invisible=true}, {name=Tensor, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=3}, {name=Tensor, dim=3}} ) wrap("ger", cname("addr"), {{name=Tensor, default=true, returned=true, method={default='nil'}, init=function(arg) return table.concat( { arg.__metatable.init(arg), string.format("TH%s_resize2d(%s, %s->size[0], %s->size[0]);", Tensor, arg:carg(), arg.args[5]:carg(), arg.args[6]:carg()) }, '\n') end, precall=function(arg) return table.concat( { string.format("TH%s_zero(%s);", Tensor, arg:carg()), arg.__metatable.precall(arg) }, '\n') end }, {name=real, default=1, invisible=true}, {name=Tensor, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=1}, {name=Tensor, dim=1}} ) for _,f in ipairs({ {name="addmv", dim1=1, dim2=2, dim3=1}, {name="addmm", dim1=2, dim2=2, dim3=2}, {name="addr", dim1=2, dim2=1, dim3=1}, {name="addbmm", dim1=2, dim2=3, dim3=3}, {name="baddbmm", dim1=3, dim2=3, dim3=3}, } ) do interface:wrap(f.name, cname(f.name), {{name=Tensor, default=true, returned=true}, {name=real, default=1}, {name=Tensor, dim=f.dim1}, {name=real, default=1}, {name=Tensor, dim=f.dim2}, {name=Tensor, dim=f.dim3}}) -- there is an ambiguity here, hence the more complicated setup method:wrap(f.name, cname(f.name), {{name=Tensor, returned=true, dim=f.dim1}, {name=real, default=1, invisible=true}, {name=Tensor, default=1, dim=f.dim1}, {name=real, default=1}, {name=Tensor, dim=f.dim2}, {name=Tensor, dim=f.dim3}}, cname(f.name), {{name=Tensor, returned=true, dim=f.dim1}, {name=real}, {name=Tensor, default=1, dim=f.dim1}, {name=real}, {name=Tensor, dim=f.dim2}, {name=Tensor, dim=f.dim3}}) end wrap("numel", cname("numel"), {{name=Tensor}, {name="ptrdiff_t", creturned=true}}) for _,name in ipairs({"cumsum", "cumprod"}) do wrap(name, cname(name), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name="index", default=1}}) end wrap("sum", cname("sumall"), {{name=Tensor}, {name=accreal, creturned=true}}, cname("sum"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name="index"}, {name="boolean", default=true, invisible=true}}) wrap("prod", cname("prodall"), {{name=Tensor}, {name=accreal, creturned=true}}, cname("prod"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name="index"}, {name="boolean", default=true, invisible=true}}) for _,name in ipairs({"min", "max"}) do wrap(name, cname(name .. "all"), {{name=Tensor}, {name=real, creturned=true}}, cname(name), {{name=Tensor, default=true, returned=true}, {name="IndexTensor", default=true, returned=true, noreadadd=true}, {name=Tensor}, {name="index"}, {name="boolean", default=true, invisible=true}}) end for _,name in ipairs({"cmin", "cmax"}) do wrap(name, cname(name), {{name=Tensor, default=true, returned=true}, {name=Tensor, method={default=1}}, {name=Tensor}}, cname(name .. "Value"), {{name=Tensor, default=true, returned=true}, {name=Tensor, method={default=1}}, {name=real}}) end wrap("trace", cname("trace"), {{name=Tensor}, {name=accreal, creturned=true}}) wrap("cross", cname("cross"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name=Tensor}, {name="index", default=0}}) wrap("diag", cname("diag"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name="long", default=0}}) wrap("eye", cname("eye"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name="long"}, {name="long", default=0}}) wrap("range", cname("range"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=accreal}, {name=accreal}, {name=accreal, default=1}}) wrap("randperm", cname("randperm"), {{name=Tensor, default=true, returned=true, method={default='nil'}, postcall=function(arg) return table.concat( { arg.__metatable.postcall(arg), string.format("TH%s_add(%s, %s, 1);", Tensor, arg:carg(), arg:carg()) }, '\n') end}, {name="Generator", default=true}, {name="long"}}) wrap("sort", cname("sort"), {{name=Tensor, default=true, returned=true}, {name="IndexTensor", default=true, returned=true, noreadadd=true}, {name=Tensor}, {name="index", default=lastdim(3)}, {name="boolean", default=0}}) wrap("topk", cname("topk"), {{name=Tensor, default=true, returned=true}, {name="IndexTensor", default=true, returned=true, noreadadd=true}, {name=Tensor}, {name="long", default=1}, {name="index", default=lastdim(3)}, {name="boolean", default=0}, {name="boolean", default=0}}) wrap("kthvalue", cname("kthvalue"), {{name=Tensor, default=true, returned=true}, {name="IndexTensor", default=true, returned=true, noreadadd=true}, {name=Tensor}, {name="long"}, {name="index", default=lastdim(3)}, {name="boolean", default=true, invisible=true}}) wrap("mode", cname("mode"), {{name=Tensor, default=true, returned=true}, {name="IndexTensor", default=true, returned=true, noreadadd=true}, {name=Tensor}, {name="index", default=lastdim(3)}, {name="boolean", default=true, invisible=true}}) wrap("median", cname("median"), {{name=Tensor, default=true, returned=true}, {name="IndexTensor", default=true, returned=true, noreadadd=true}, {name=Tensor}, {name="index", default=lastdim(3)}, {name="boolean", default=true, invisible=true}}) wrap("tril", cname("tril"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name="int", default=0}}) wrap("triu", cname("triu"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name="int", default=0}}) wrap("cat", cname("cat"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name=Tensor}, {name="index", default=-1}}, cname("catArray"), {{name=Tensor, default=true, returned=true}, {name=Tensor .. "Array"}, {name="index", default=-1}}) if Tensor == 'ByteTensor' then -- we declare this only once interface:print( [[ static long THRandom_random2__(THGenerator *gen, long a, long b) { THArgCheck(b >= a, 2, "upper bound must be larger than lower bound"); return((THRandom_random(gen) % (b+1-a)) + a); } static long THRandom_random1__(THGenerator *gen, long b) { THArgCheck(b > 0, 1, "upper bound must be strictly positive"); return(THRandom_random(gen) % b + 1); } ]]) end interface:print(string.gsub( [[ static void THTensor_random2__(THTensor *self, THGenerator *gen, long a, long b) { THArgCheck(b >= a, 2, "upper bound must be larger than lower bound"); TH_TENSOR_APPLY(real, self, *self_data = ((THRandom_random(gen) % (b+1-a)) + a);) } static void THTensor_random1__(THTensor *self, THGenerator *gen, long b) { THArgCheck(b > 0, 1, "upper bound must be strictly positive"); TH_TENSOR_APPLY(real, self, *self_data = (THRandom_random(gen) % b + 1);) } ]], 'Tensor', Tensor):gsub('real', real)) wrap('random', 'THRandom_random2__', {{name='Generator', default=true}, {name='long'}, {name='long'}, {name='long', creturned=true}}, 'THRandom_random1__', {{name='Generator', default=true}, {name='long'}, {name='long', creturned=true}}, 'THRandom_random', {{name='Generator', default=true}, {name='long', creturned=true}}, cname("random2__"), {{name=Tensor, returned=true}, {name='Generator', default=true}, {name='long'}, {name='long'}}, cname("random1__"), {{name=Tensor, returned=true}, {name='Generator', default=true}, {name='long'}}, cname("random"), {{name=Tensor, returned=true}, {name='Generator', default=true}}) wrap("geometric", "THRandom_geometric", {{name="Generator", default=true}, {name="double"}, {name="double", creturned=true}}, cname("geometric"), {{name=Tensor, returned=true}, {name="Generator", default=true}, {name="double"}}) wrap("bernoulli", "THRandom_bernoulli", {{name="Generator", default=true}, {name="double", default=0.5}, {name="double", creturned=true}}, cname("bernoulli"), {{name=Tensor, returned=true}, {name="Generator", default=true}, {name="double", default=0.5}}, cname("bernoulli_FloatTensor"), {{name=Tensor, returned=true}, {name="Generator", default=true}, {name="FloatTensor"}}, cname("bernoulli_DoubleTensor"), {{name=Tensor, returned=true}, {name="Generator", default=true}, {name="DoubleTensor"}}) wrap("squeeze", cname("squeeze"), {{name=Tensor, default=true, returned=true, postcall=function(arg) local txt = {} if arg.returned then table.insert(txt, string.format('if(arg%d->nDimension == 1 && arg%d->size[0] == 1)', arg.i, arg.i)) -- number table.insert(txt, string.format('lua_pushnumber(L, (lua_Number)(*TH%s_data(arg%d)));', Tensor, arg.i)) end return table.concat(txt, '\n') end}, {name=Tensor}}, cname("squeeze1d"), {{name=Tensor, default=true, returned=true, postcall= function(arg) local txt = {} if arg.returned then table.insert(txt, string.format('if(!hasdims && arg%d->nDimension == 1 && arg%d->size[0] == 1)', arg.i, arg.i)) -- number table.insert(txt, string.format('lua_pushnumber(L, (lua_Number)(*TH%s_data(arg%d)));}', Tensor, arg.i)) end return table.concat(txt, '\n') end}, {name=Tensor, precall= function(arg) return string.format('{int hasdims = arg%d->nDimension > 1;', arg.i) end}, {name="index"}}) wrap("sign", cname("sign"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}}) wrap("conv2", cname("conv2Dmul"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=2}, {name=Tensor, dim=2}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="C", invisible=true}}, cname("conv2Dcmul"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=3}, {name=Tensor, dim=3}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="C", invisible=true}}, cname("conv2Dmv"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=3}, {name=Tensor, dim=4}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="C", invisible=true}} ) wrap("xcorr2", cname("conv2Dmul"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=2}, {name=Tensor, dim=2}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="X", invisible=true}}, cname("conv2Dcmul"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=3}, {name=Tensor, dim=3}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="X", invisible=true}}, cname("conv2Dmv"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=3}, {name=Tensor, dim=4}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="X", invisible=true}} ) wrap("conv3", cname("conv3Dmul"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=3}, {name=Tensor, dim=3}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="C", invisible=true}}, cname("conv3Dcmul"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=4}, {name=Tensor, dim=4}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="C", invisible=true}}, cname("conv3Dmv"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=4}, {name=Tensor, dim=5}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="C", invisible=true}} ) wrap("xcorr3", cname("conv3Dmul"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=3}, {name=Tensor, dim=3}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="X", invisible=true}}, cname("conv3Dcmul"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=4}, {name=Tensor, dim=4}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="X", invisible=true}}, cname("conv3Dmv"), {{name=Tensor, default=true, returned=true}, {name=real, default=0, invisible=true}, {name=real, default=1, invisible=true}, {name=Tensor, dim=4}, {name=Tensor, dim=5}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name=real, default=1, invisible=true}, {name='charoption', values={'V', 'F'}, default='V'}, {name='charoption', default="X", invisible=true}} ) for _,name in pairs({'lt','gt','le','ge','eq','ne'}) do wrap(name, cname(name .. 'Value'), {{name='ByteTensor',default=true, returned=true}, {name=Tensor}, {name=real}}, cname(name .. 'ValueT'), {{name=Tensor, returned=true}, {name=Tensor}, {name=real}}, cname(name .. 'Tensor'), {{name='ByteTensor',default=true, returned=true}, {name=Tensor}, {name=Tensor}}, cname(name .. 'TensorT'), {{name=Tensor, returned=true}, {name=Tensor}, {name=Tensor}}) end wrap("nonzero", cname("nonzero"), {{name="IndexTensor", default=true, returned=true}, {name=Tensor}}) end -- ~= HalfTensor if Tensor == 'ByteTensor' then -- Logical accumulators only apply to ByteTensor for _,name in ipairs({'all', 'any'}) do wrap(name, cname('logical' .. name), {{name=Tensor}, {name="boolean", creturned=true}}) end end if Tensor == 'IntTensor' then wrap("abs", cname("abs"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}}, "abs", {{name=real}, {name=real, creturned=true}}) elseif Tensor == 'LongTensor' then wrap("abs", cname("abs"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}}, "labs", {{name=real}, {name=real, creturned=true}}) end if Tensor == 'FloatTensor' or Tensor == 'DoubleTensor' then wrap("mean", cname("meanall"), {{name=Tensor}, {name=accreal, creturned=true}}, cname("mean"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name="index"}, {name="boolean", default=true, invisible=true}}) for _,name in ipairs({"var", "std"}) do wrap(name, cname(name .. "all"), {{name=Tensor}, {name="boolean", default=false}, {name=accreal, creturned=true} }, cname(name), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name="index"}, {name="boolean", default=false}, {name="boolean", default=true, invisible=true}}) end wrap("histc", cname("histc"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name="long",default=100}, {name="double",default=0}, {name="double",default=0}}) wrap("bhistc", cname("bhistc"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name="long",default=100}, {name="double",default=0}, {name="double",default=0}}) wrap("norm", cname("normall"), {{name=Tensor}, {name=real, default=2}, {name=accreal, creturned=true}}, cname("norm"), {{name=Tensor, default=true, returned=true}, {name=Tensor}, {name=real}, {name="index"}, {name="boolean", default=true, invisible=true}}) wrap("renorm", cname("renorm"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}, {name="index"}, {name=real}}) wrap("dist", cname("dist"), {{name=Tensor}, {name=Tensor}, {name=real, default=2}, {name=accreal, creturned=true}}) wrap("linspace", cname("linspace"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=real}, {name=real}, {name="long", default=100}}) wrap("logspace", cname("logspace"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=real}, {name=real}, {name="long", default=100}}) for _,name in ipairs({"log", "log1p", "exp", "cos", "acos", "cosh", "sin", "asin", "sinh", "tan", "atan", "tanh", "sqrt", "round", "ceil", "floor", "trunc", }) do wrap(name, cname(name), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}}, name, {{name=real}, {name=real, creturned=true}}) end wrap("abs", cname("abs"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}}, "fabs", {{name=real}, {name=real, creturned=true}}) wrap("frac", cname("frac"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}}, "TH_frac", {{name=real}, {name=real, creturned=true}}) wrap("rsqrt", cname("rsqrt"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}}, "TH_rsqrt", {{name=real}, {name=real, creturned=true}}) wrap("sigmoid", cname("sigmoid"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}}, "TH_sigmoid", {{name=real}, {name=real, creturned=true}}) wrap("neg", cname("neg"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}}) wrap("cinv", cname("cinv"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}}) wrap("lerp", cname("lerp"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}, {name=real}}, "TH_lerp", {{name=real}, {name=real}, {name=real}, {name=real, creturned=true}}) wrap("atan2", cname("atan2"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=Tensor}}, "atan2", {{name=real}, {name=real}, {name=real, creturned=true}}) wrap("pow", cname("pow"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=Tensor, method={default=1}}, {name=real}}, cname("tpow"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name=real}, {name=Tensor, method={default=1}}}, "pow", {{name=real}, {name=real}, {name=real, creturned=true}}) wrap("rand", cname("rand"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name='Generator', default=true}, {name="LongArg"}}) wrap("randn", cname("randn"), {{name=Tensor, default=true, returned=true, method={default='nil'}}, {name='Generator', default=true}, {name="LongArg"}}) wrap("multinomial", cname("multinomial"), {{name="IndexTensor", default=true, returned=true, method={default='nil'}}, {name='Generator', default=true}, {name=Tensor}, {name="int"}, {name="boolean", default=false}}) wrap("multinomialAliasSetup_", cname("multinomialAliasSetup"), {{name=Tensor}, {name="IndexTensor", default=true, returned=true, method={default='nil'}}, {name=Tensor, default=true, returned=true, method={default='nil'}}}) wrap("multinomialAlias_", cname("multinomialAliasDraw"), {{name="IndexTensor", default=true, returned=true, method={default='nil'}}, {name='Generator', default=true}, {name="IndexTensor"}, {name=Tensor} }) for _,f in ipairs({{name='uniform', a=0, b=1}, {name='normal', a=0, b=1}, {name='cauchy', a=0, b=1}, {name='logNormal', a=1, b=2}}) do wrap(f.name, string.format("THRandom_%s", f.name), {{name='Generator', default=true}, {name="double", default=f.a}, {name="double", default=f.b}, {name="double", creturned=true}}, cname(f.name), {{name=Tensor, returned=true}, {name='Generator', default=true}, {name=real, default=f.a}, {name=real, default=f.b}}) end for _,f in ipairs({{name='exponential'}}) do wrap(f.name, string.format("THRandom_%s", f.name), {{name='Generator', default=true}, {name="double", default=f.a}, {name="double", creturned=true}}, cname(f.name), {{name=Tensor, returned=true}, {name='Generator', default=true}, {name=real, default=f.a}}) end for _,name in ipairs({"gesv","gels"}) do interface:wrap(name, cname(name), {{name=Tensor, returned=true}, {name=Tensor, returned=true}, {name=Tensor}, {name=Tensor}}, cname(name), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}, {name=Tensor}} ) end interface:wrap("trtrs", cname("trtrs"), {{name=Tensor, returned=true}, {name=Tensor, returned=true}, {name=Tensor}, {name=Tensor}, {name='charoption', values={'U', 'L'}, default='U'}, -- uplo {name='charoption', values={'N', 'T'}, default='N'}, -- trans {name='charoption', values={'N', 'U'}, default='N'}}, -- diag cname("trtrs"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}, {name=Tensor}, {name='charoption', values={'U', 'L'}, default='U'}, -- uplo {name='charoption', values={'N', 'T'}, default='N'}, -- trans {name='charoption', values={'N', 'U'}, default='N'}} -- diag ) interface:wrap("symeig", cname("syev"), {{name=Tensor, returned=true}, {name=Tensor, returned=true}, {name=Tensor}, {name='charoption', values={'N', 'V'}, default='N'}, {name='charoption', values={'U', 'L'}, default='U'}}, cname("syev"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}, {name='charoption', values={'N', 'V'}, default='N'}, {name='charoption', values={'U', 'L'}, default='U'}} ) interface:wrap("eig", cname("geev"), {{name=Tensor, returned=true}, {name=Tensor, returned=true}, {name=Tensor}, {name='charoption', values={'N', 'V'}, default='N'}}, cname("geev"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}, {name='charoption', values={'N', 'V'}, default='N'}} ) interface:wrap("svd", cname("gesvd"), {{name=Tensor, returned=true}, {name=Tensor, returned=true}, {name=Tensor, returned=true}, {name=Tensor}, {name='charoption', values={'A', 'S'}, default='S'}}, cname("gesvd"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}, {name='charoption', values={'A', 'S'}, default='S'}} ) interface:wrap("inverse", cname("getri"), {{name=Tensor, returned=true}, {name=Tensor}}, cname("getri"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}} ) interface:wrap("potrf", cname("potrf"), {{name=Tensor, returned=true}, {name=Tensor}, {name='charoption', values={'U', 'L'}, default='U'}}, -- uplo cname("potrf"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}, {name='charoption', values={'U', 'L'}, default='U'}} ) interface:wrap("potrs", cname("potrs"), {{name=Tensor, returned=true}, {name=Tensor}, {name=Tensor}, {name='charoption', values={'U', 'L'}, default='U'}}, -- uplo cname("potrs"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}, {name=Tensor}, {name='charoption', values={'U', 'L'}, default='U'}} ) interface:wrap("potri", cname("potri"), {{name=Tensor, returned=true}, {name=Tensor}, {name='charoption', values={'U', 'L'}, default='U'}}, -- uplo cname("potri"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}, {name='charoption', values={'U', 'L'}, default='U'}} -- uplo ) interface:wrap("pstrf", cname("pstrf"), {{name=Tensor, returned=true}, {name='IntTensor', returned=true}, {name=Tensor}, {name='charoption', values={'U', 'L'}, default='U'}, -- uplo {name=real, default=-1}}, cname("pstrf"), {{name=Tensor, default=true, returned=true, invisible=true}, {name='IntTensor', default=true, returned=true, invisible=true}, {name=Tensor}, {name='charoption', values={'U', 'L'}, default='U'}, -- uplo {name=real, default=-1}} ) interface:wrap("qr", cname("qr"), {{name=Tensor, returned=true}, {name=Tensor, returned=true}, {name=Tensor}}, cname("qr"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}} ) interface:wrap("geqrf", cname("geqrf"), {{name=Tensor, returned=true}, {name=Tensor, returned=true}, {name=Tensor}}, cname("geqrf"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}} ) interface:wrap("orgqr", cname("orgqr"), {{name=Tensor, returned=true}, {name=Tensor}, {name=Tensor}}, cname("orgqr"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}, {name=Tensor}} ) interface:wrap("ormqr", cname("ormqr"), {{name=Tensor, returned=true}, {name=Tensor}, {name=Tensor}, {name=Tensor}, {name='charoption', values={'L', 'R'}, default='L'}, {name='charoption', values={'N', 'T'}, default='N'}}, cname("ormqr"), {{name=Tensor, default=true, returned=true, invisible=true}, {name=Tensor}, {name=Tensor}, {name=Tensor}, {name='charoption', values={'L', 'R'}, default='L'}, {name='charoption', values={'N', 'T'}, default='N'}} ) end method:register(string.format("m_torch_%sMath__", Tensor)) interface:print(method:tostring()) method:clearhistory() interface:register(string.format("torch_%sMath__", Tensor)) interface:print(string.gsub([[ static void torch_TensorMath_init(lua_State *L) { luaT_pushmetatable(L, "torch.Tensor"); /* register methods */ luaT_setfuncs(L, m_torch_TensorMath__, 0); /* register functions into the "torch" field of the tensor metaclass */ lua_pushstring(L, "torch"); lua_newtable(L); luaT_setfuncs(L, torch_TensorMath__, 0); lua_rawset(L, -3); lua_pop(L, 1); } ]], 'Tensor', Tensor)) end interface:dispatchregister("torch_TensorMath__") interface:print([[ void torch_TensorMath_init(lua_State *L) { torch_ByteTensorMath_init(L); torch_CharTensorMath_init(L); torch_ShortTensorMath_init(L); torch_IntTensorMath_init(L); torch_LongTensorMath_init(L); torch_FloatTensorMath_init(L); torch_DoubleTensorMath_init(L); luaT_setfuncs(L, torch_TensorMath__, 0); } ]]) if arg[1] then interface:tofile(arg[1]) else print(interface:tostring()) end TensorOperator.c000066400000000000000000000004441316246254300142200ustar00rootroot00000000000000#include "general.h" #define torch_TensorOperator_(NAME) TH_CONCAT_4(torch_,Real,TensorOperator_,NAME) #define torch_Tensor_id TH_CONCAT_3(torch_,Real,Tensor_id) #define torch_Tensor TH_CONCAT_STRING_3(torch.,Real,Tensor) #include "generic/TensorOperator.c" #include "THGenerateAllTypes.h" TestSuite.lua000066400000000000000000000012771316246254300135270ustar00rootroot00000000000000function torch.TestSuite() local obj = { __tests = {}, __isTestSuite = true } local metatable = {} function metatable:__index(key) return self.__tests[key] end function metatable:__newindex(key, value) if self.__tests[key] ~= nil then error("Test " .. tostring(key) .. " is already defined.") end if type(value) ~= "function" then if type(value) == "table" then error("Nested tables of tests are not supported") else error("Only functions are supported as members of a TestSuite") end end self.__tests[key] = value end setmetatable(obj, metatable) return obj end Tester.lua000066400000000000000000000673031316246254300130460ustar00rootroot00000000000000 -- Lua 5.2 compatibility local unpack = unpack or table.unpack local check = {} -- helper functions, defined at the bottom of the file local Tester = torch.class('torch.Tester') function Tester:__init() self.errors = {} self.tests = {} self.warnings = {} self._warningCount = {} self.disabledTests = {} self._currentTestName = '' -- To maintain backwards compatibility (at least for a short while), -- disable exact dimension checking of tensors when :assertTensorEq is -- called. Thus {{1}} == {1} when this flag is true. -- -- Note that other methods that suppose tensor checking (such as -- :assertGeneralEq) ignore this flag, since previously they didn't -- exist or support tensor equality checks at all, so there is no -- old code that uses these functions and relies on the behaviour. -- -- Note also that if the dimension check fails with this flag is true, then -- will show a warning. self._assertTensorEqIgnoresDims = true end function Tester:setEarlyAbort(earlyAbort) self.earlyAbort = earlyAbort end function Tester:setRethrowErrors(rethrow) self.rethrow = rethrow end function Tester:setSummaryOnly(summaryOnly) self.summaryOnly = summaryOnly end -- Add a success to the test. function Tester:_success() local name = self._currentTestName self.assertionPass[name] = self.assertionPass[name] + 1 return true end function Tester:_addDebugInfo(message) local ss = debug.traceback('tester', 3) or '' ss = ss:match('.-\n([^\n]+\n[^\n]+)\n[^\n]+xpcall') or '' local name = self._currentTestName return (name ~= '' and name .. '\n' or '') .. message .. '\n' .. ss end -- Add a failure to the test. function Tester:_failure(message) if self.rethrow then error(message, 2) end local name = self._currentTestName self.assertionFail[name] = self.assertionFail[name] + 1 self.errors[#self.errors + 1] = self:_addDebugInfo(message) return false end -- Add a warning to the test function Tester:_warning(message) local name = self._currentTestName self._warningCount[name] = (self._warningCount[name] or 0) + 1 self.warnings[#self.warnings + 1] = self:_addDebugInfo(message) end -- Call this during a test run with `condition = true` to log a success, or with -- `condition = false` to log a failure (using `message`). function Tester:_assert_sub(condition, message) if condition then return self:_success() else return self:_failure(message) end end local function getMessage(message, ...) assert(next{...} == nil, "Unexpected arguments passed to test function") if message then assert(type(message) == 'string', 'message parameter must be a string') if message ~= '' then return message .. '\n' end end return '' end --[[ Historically, some test functions have accepted both a message and a tolerance, and some just a message (e.g., assertTableEq). Now assertTableEq accepts both a tolerance and a message, so allow the two arguments to be passed in either order to maintain backwards compatibility (and more generally, for convenience). (We still document the ordering as "tolerance, message" for clarity.) This function also sanitizes them (ensures they are non-nil, etc). ]] local function getToleranceAndMessage(defaultTolerance, ...) local args = {...} local message = nil local tolerance = nil for _, a in ipairs(args) do if type(a) == 'string' then if message then error("Unexpected string argument; already have message", a) end message = a .. '\n' elseif type(a) == 'number' then if tolerance then error("Unexpected number argument; already have tolerance", a) end tolerance = a assert(tolerance >= 0, "tolerance cannot be negative") else error("Unrecognized argument; should be a tolerance or message", a) end end message = message or '' tolerance = tolerance or defaultTolerance return tolerance, message end function Tester:assert(condition, ...) local message = getMessage(...) if type(condition) ~= 'boolean' then self:_warning(" :assert should only be used for boolean conditions. " .. "To check for non-nil variables, do this explicitly: " .. "Tester:assert(var ~= nil).") end return self:_assert_sub(condition, string.format('%sBOOL violation condition=%s', message, tostring(condition))) end function Tester:assertGeneralEq(got, expected, ...) return self:_eqOrNeq(got, expected, false, ...) end function Tester:eq(got, expected, ...) return self:assertGeneralEq(got, expected, ...) end function Tester:assertGeneralNe(got, unexpected, ...) return self:_eqOrNeq(got, unexpected, true, ...) end function Tester:ne(got, unexpected, ...) return self:assertGeneralNe(got, unexpected, ...) end function Tester:_eqOrNeq(got, expected, negate, ...) local tolerance, message = getToleranceAndMessage(0, ...) local success, subMessage = check.areEq(got, expected, tolerance, negate) subMessage = subMessage or '' return self:_assert_sub(success, message .. subMessage) end function Tester:assertlt(a, b, ...) local message = getMessage(...) return self:_assert_sub(a < b, string.format('%sLT failed: %s >= %s', message, tostring(a), tostring(b))) end function Tester:assertgt(a, b, ...) local message = getMessage(...) return self:_assert_sub(a > b, string.format('%sGT failed: %s <= %s', message, tostring(a), tostring(b))) end function Tester:assertle(a, b, ...) local message = getMessage(...) return self:_assert_sub(a <= b, string.format('%sLE failed: %s > %s', message, tostring(a), tostring(b))) end function Tester:assertge(a, b, ...) local message = getMessage(...) return self:_assert_sub(a >= b, string.format('%sGE failed: %s < %s', message, tostring(a), tostring(b))) end function Tester:assertalmosteq(a, b, ...) local tolerance, message = getToleranceAndMessage(1e-16, ...) local diff = math.abs(a - b) return self:_assert_sub( diff <= tolerance, string.format( '%sALMOST_EQ failed: %s ~= %s with tolerance=%s', message, tostring(a), tostring(b), tostring(tolerance))) end function Tester:asserteq(a, b, ...) local message = getMessage(...) return self:_assert_sub(a == b, string.format('%sEQ failed: %s ~= %s', message, tostring(a), tostring(b))) end function Tester:assertne(a, b, ...) local message = getMessage(...) if type(a) == type(b) and type(a) == 'table' or type(a) == 'userdata' then self:_warning(" :assertne should only be used to compare basic lua " .. "objects (numbers, booleans, etc). Consider using " .. "either :assertGeneralNe or :assert(a ~= b).") end return self:_assert_sub(a ~= b, string.format('%sNE failed: %s == %s', message, tostring(a), tostring(b))) end function Tester:assertTensorEq(ta, tb, ...) return self:_assertTensorEqOrNeq(ta, tb, false, ...) end function Tester:assertTensorNe(ta, tb, ...) return self:_assertTensorEqOrNeq(ta, tb, true, ...) end function Tester:_assertTensorEqOrNeq(ta, tb, negate, ...) assert(torch.isTensor(ta), "First argument should be a Tensor") assert(torch.isTensor(tb), "Second argument should be a Tensor") local tolerance, message = getToleranceAndMessage(0, ...) local success, subMessage = check.areTensorsEq(ta, tb, tolerance, negate, self._assertTensorEqIgnoresDims) subMessage = subMessage or '' if self._assertTensorEqIgnoresDims and (not negate) and success and not ta:isSameSizeAs(tb) then self:_warning("Tensors have the same content but different dimensions. " .. "For backwards compatibility, they are considered equal, " .. "but this may change in the future. Consider using :eq " .. "to check for equality instead.") end return self:_assert_sub(success, message .. subMessage) end function Tester:assertTableEq(ta, tb, ...) return self:_assertTableEqOrNeq(ta, tb, false, ...) end function Tester:assertTableNe(ta, tb, ...) return self:_assertTableEqOrNeq(ta, tb, true, ...) end function Tester:_assertTableEqOrNeq(ta, tb, negate, ...) assert(type(ta) == 'table', "First argument should be a Table") assert(type(tb) == 'table', "Second argument should be a Table") return self:_eqOrNeq(ta, tb, negate, ...) end function Tester:assertError(f, ...) return self:assertErrorObj(f, function() return true end, ...) end function Tester:assertNoError(f, ...) local message = getMessage(...) local status, err = pcall(f) return self:_assert_sub(status, string.format('%sERROR violation: err=%s', message, tostring(err))) end function Tester:assertErrorMsg(f, errmsg, ...) return self:assertErrorObj(f, function(err) return err == errmsg end, ...) end function Tester:assertErrorPattern(f, errPattern, ...) local function errcomp(err) return string.find(err, errPattern) ~= nil end return self:assertErrorObj(f, errcomp, ...) end function Tester:assertErrorObj(f, errcomp, ...) local message = getMessage(...) local status, err = pcall(f) return self:_assert_sub((not status) and errcomp(err), string.format('%sERROR violation: err=%s', message, tostring(err))) end function Tester:add(f, name) if type(f) == "table" then assert(name == nil, "Name parameter is forbidden for a table of tests, " .. "since its use is ambiguous") if f.__isTestSuite then f = f.__tests else self:_warning("Should use TestSuite rather than plain lua table") end for i, v in pairs(f) do -- We forbid nested tests because the "expected" behaviour when a named -- test is run in the case that the named test is in fact a table of -- tests is not supported. Similar issue with _setUp and _tearDown -- functions inside nested tests. assert(type(v) ~= 'table', "Nested sets of tests are not supported") self:add(v, i) end return self end assert(type(f) == 'function', "Only tables of functions and functions supported") if name == '_setUp' then assert(not self._setUp, "Only one set-up function allowed") self._setUp = f elseif name == '_tearDown' then assert(not self._tearDown, "Only one tear-down function allowed") self._tearDown = f else name = name or 'unknown' if self.tests[name] ~= nil then error('Test with name ' .. name .. ' already exists!') end self.tests[name] = f end return self end function Tester:disable(testNames) if type(testNames) == 'string' then testNames = {testNames} end assert(type(testNames) == 'table', "Expecting name or list for disable") for _, name in ipairs(testNames) do assert(self.tests[name], "Unrecognized test '" .. name .. "'") self.disabledTests[name] = true end return self end function Tester:run(testNames) local tests = self:_getTests(testNames) self.assertionPass = {} self.assertionFail = {} self.haveWarning = {} self.testError = {} for name in pairs(tests) do self.assertionPass[name] = 0 self.assertionFail[name] = 0 self.testError[name] = 0 self._warningCount[name] = 0 end self:_run(tests) self:_report(tests) -- Throws an error on test failure/error, so that test script returns -- with nonzero return value. for name in pairs(tests) do assert(self.assertionFail[name] == 0, 'An error was found while running tests!') assert(self.testError[name] == 0, 'An error was found while running tests!') end return 0 end local function pluralize(num, str) local stem = num .. ' ' .. str if num == 1 then return stem else return stem .. 's' end end local NCOLS = 80 local coloured local enable_colors, c = pcall(require, 'sys.colors') if arg and enable_colors then -- have we been invoked from the commandline? coloured = function(str, colour) return colour .. str .. c.none end else c = {} coloured = function(str) return str end end function Tester:_run(tests) local ntests = 0 for _ in pairs(tests) do ntests = ntests + 1 end local ntestsAsString = string.format('%u', ntests) local cfmt = string.format('%%%uu/%u ', ntestsAsString:len(), ntestsAsString) local cfmtlen = ntestsAsString:len() * 2 + 2 local function bracket(str) return '[' .. str .. ']' end io.write('Running ' .. pluralize(ntests, 'test') .. '\n') local i = 1 for name, fn in pairs(tests) do self._currentTestName = name -- TODO: compute max length of name and cut it down to size if needed local strinit = coloured(string.format(cfmt, i), c.cyan) .. self._currentTestName .. ' ' .. string.rep('.', NCOLS - 6 - 2 - cfmtlen - self._currentTestName:len()) .. ' ' io.write(strinit .. bracket(coloured('WAIT', c.cyan))) io.flush() local status, message, pass, skip if self.disabledTests[name] then skip = true else skip = false if self._setUp then self._setUp(name) end if self.rethrow then status = true local nerr = #self.errors message = fn() pass = nerr == #self.errors else status, message, pass = self:_pcall(fn) end if self._tearDown then self._tearDown(name) end end io.write('\r') io.write(strinit) if skip then io.write(bracket(coloured('SKIP', c.yellow))) elseif not status then self.testError[name] = 1 io.write(bracket(coloured('ERROR', c.magenta))) elseif not pass then io.write(bracket(coloured('FAIL', c.red))) else io.write(bracket(coloured('PASS', c.green))) if self._warningCount[name] > 0 then io.write('\n' .. string.rep(' ', NCOLS - 10)) io.write(bracket(coloured('+warning', c.yellow))) end end io.write('\n') io.flush() if self.earlyAbort and (i < ntests) and (not status or not pass) and (not skip) then io.write('Aborting on first error, not all tests have been executed\n') break end i = i + 1 collectgarbage() end end function Tester:_pcall(f) local nerr = #self.errors local stat, result = xpcall(f, debug.traceback) if not stat then self.errors[#self.errors + 1] = self._currentTestName .. '\n Function call failed\n' .. result .. '\n' end return stat, result, stat and (nerr == #self.errors) end function Tester:_getTests(testNames) if testNames == nil then return self.tests end if type(testNames) == 'string' then testNames = {testNames} end assert(type(testNames) == 'table', "Only accept a name or table of test names (or nil for all tests)") local function getMatchingNames(pattern) local matchingNames = {} for name in pairs(self.tests) do if string.match(name, pattern) then table.insert(matchingNames, name) end end return matchingNames end local tests = {} for _, pattern in ipairs(testNames) do local matchingNames = getMatchingNames(pattern) assert(#matchingNames > 0, "Couldn't find test '" .. pattern .. "'") for _, name in ipairs(matchingNames) do tests[name] = self.tests[name] end end return tests end function Tester:_report(tests) local ntests = 0 local nfailures = 0 local nerrors = 0 local nskipped = 0 local nwarnings = 0 self.countasserts = 0 for name in pairs(tests) do ntests = ntests + 1 self.countasserts = self.countasserts + self.assertionFail[name] + self.assertionPass[name] if self.assertionFail[name] > 0 then nfailures = nfailures + 1 end if self.testError[name] > 0 then nerrors = nerrors + 1 end if self._warningCount[name] > 0 then nwarnings = nwarnings + 1 end if self.disabledTests[name] then nskipped = nskipped + 1 end end if self._warningCount[''] then nwarnings = nwarnings + self._warningCount[''] end io.write('Completed ' .. pluralize(self.countasserts, 'assert')) io.write(' in ' .. pluralize(ntests, 'test') .. ' with ') io.write(coloured(pluralize(nfailures, 'failure'), nfailures == 0 and c.green or c.red)) io.write(' and ') io.write(coloured(pluralize(nerrors, 'error'), nerrors == 0 and c.green or c.magenta)) if nwarnings > 0 then io.write(' and ') io.write(coloured(pluralize(nwarnings, 'warning'), c.yellow)) end if nskipped > 0 then io.write(' and ') io.write(coloured(nskipped .. ' disabled', c.yellow)) end io.write('\n') -- Prints off a message separated by ----- local haveSection = false local function addSection(text) local function printDashes() io.write(string.rep('-', NCOLS) .. '\n') end if not haveSection then printDashes() haveSection = true end io.write(text .. '\n') printDashes() end if not self.summaryOnly then for _, v in ipairs(self.errors) do addSection(v) end for _, v in ipairs(self.warnings) do addSection(v) end end end --[[ Tests for tensor equality between two tensors of matching sizes and types. Tests whether the maximum element-wise difference between `ta` and `tb` is less than or equal to `tolerance`. Arguments: * `ta` (tensor) * `tb` (tensor) * `tolerance` (number) maximum elementwise difference between `ta` and `tb`. * `negate` (boolean) if true, we invert success and failure. * `storage` (boolean) if true, we print an error message referring to Storages rather than Tensors. Returns: 1. success, boolean that indicates success 2. failure_message, string or nil ]] function check.areSameFormatTensorsEq(ta, tb, tolerance, negate, storage) local function ensureHasAbs(t) -- Byte, Char and Short Tensors don't have abs return t.abs and t or t:double() end ta = ensureHasAbs(ta) tb = ensureHasAbs(tb) local diff = ta:clone():add(-1, tb):abs() local err = diff:max() local success = err <= tolerance if negate then success = not success end local errMessage if not success then local prefix = storage and 'Storage' or 'Tensor' local violation = negate and 'NE(==)' or 'EQ(==)' errMessage = string.format('%s%s violation: max diff=%s, tolerance=%s', prefix, violation, tostring(err), tostring(tolerance)) end return success, errMessage end --[[ Tests for tensor equality. Tests whether the maximum element-wise difference between `ta` and `tb` is less than or equal to `tolerance`. Arguments: * `ta` (tensor) * `tb` (tensor) * `tolerance` (number) maximum elementwise difference between `ta` and `tb`. * `negate` (boolean) if negate is true, we invert success and failure. * `ignoreTensorDims` (boolean, default false) if true, then tensors of the same size but different dimensions can still be considered equal, e.g., {{1}} == {1}. For backwards compatibility. Returns: 1. success, boolean that indicates success 2. failure_message, string or nil ]] function check.areTensorsEq(ta, tb, tolerance, negate, ignoreTensorDims) ignoreTensorDims = ignoreTensorDims or false if not ignoreTensorDims and ta:dim() ~= tb:dim() then return negate, 'The tensors have different dimensions' end if ta:type() ~= tb:type() then return negate, 'The tensors have different types' end -- If we are comparing two empty tensors, return true. -- This is needed because some functions below cannot be applied to tensors -- of dimension 0. if ta:dim() == 0 and tb:dim() == 0 then return not negate, 'Both tensors are empty' end local sameSize if ignoreTensorDims then sameSize = ta:nElement() == tb:nElement() else sameSize = ta:isSameSizeAs(tb) end if not sameSize then return negate, 'The tensors have different sizes' end return check.areSameFormatTensorsEq(ta, tb, tolerance, negate, false) end local typesMatching = { ['torch.ByteStorage'] = torch.ByteTensor, ['torch.CharStorage'] = torch.CharTensor, ['torch.ShortStorage'] = torch.ShortTensor, ['torch.IntStorage'] = torch.IntTensor, ['torch.LongStorage'] = torch.LongTensor, ['torch.FloatStorage'] = torch.FloatTensor, ['torch.DoubleStorage'] = torch.DoubleTensor, ['torch.HalfStorage'] = torch.HalfTensor, } --[[ Tests for storage equality. Tests whether the maximum element-wise difference between `sa` and `sb` is less than or equal to `tolerance`. Arguments: * `sa` (storage) * `sb` (storage) * `tolerance` (number) maximum elementwise difference between `a` and `b`. * `negate` (boolean) if negate is true, we invert success and failure. Returns: 1. success, boolean that indicates success 2. failure_message, string or nil ]] function check.areStoragesEq(sa, sb, tolerance, negate) if sa:size() ~= sb:size() then return negate, 'The storages have different sizes' end local typeOfsa = torch.type(sa) local typeOfsb = torch.type(sb) if typeOfsa ~= typeOfsb then return negate, 'The storages have different types' end local ta = typesMatching[typeOfsa](sa) local tb = typesMatching[typeOfsb](sb) return check.areSameFormatTensorsEq(ta, tb, tolerance, negate, true) end --[[ Tests for general (deep) equality. The types of `got` and `expected` must match. Tables are compared recursively. Keys and types of the associated values must match, recursively. Numbers are compared with the given tolerance. Torch tensors and storages are compared with the given tolerance on their elementwise difference. Other types are compared for strict equality with the regular Lua == operator. Arguments: * `got` * `expected` * `tolerance` (number) maximum elementwise difference between `a` and `b`. * `negate` (boolean) if negate is true, we invert success and failure. Returns: 1. success, boolean that indicates success 2. failure_message, string or nil ]] function check.areEq(got, expected, tolerance, negate) local errMessage if type(got) ~= type(expected) then if not negate then errMessage = 'EQ failed: values have different types (first: ' .. type(got) .. ', second: ' .. type(expected) .. ')' end return negate, errMessage elseif type(got) == 'number' then local diff = math.abs(got - expected) local ok = (diff <= tolerance) if negate then ok = not ok end if not ok then if negate then errMessage = string.format("NE failed: %s == %s", tostring(got), tostring(expected)) else errMessage = string.format("EQ failed: %s ~= %s", tostring(got), tostring(expected)) end if tolerance > 0 then errMessage = errMessage .. " with tolerance=" .. tostring(tolerance) end end return ok, errMessage elseif type(expected) == "table" then return check.areTablesEq(got, expected, tolerance, negate) elseif torch.isTensor(got) then return check.areTensorsEq(got, expected, tolerance, negate) elseif torch.isStorage(got) then return check.areStoragesEq(got, expected, tolerance, negate) else -- Below: we have the same type which is either userdata or a lua type -- which is not a number. local ok = (got == expected) if negate then ok = not ok end if not ok then if negate then errMessage = string.format("NE failed: %s (%s) == %s (%s)", tostring(got), type(got), tostring(expected), type(expected)) else errMessage = string.format("EQ failed: %s (%s) ~= %s (%s)", tostring(got), type(got), tostring(expected), type(expected)) end end return ok, errMessage end end --[[ Tests for (deep) table equality. Tables are compared recursively. Keys and types of the associated values must match, recursively. Numbers are compared with the given tolerance. Torch tensors and storages are compared with the given tolerance on their elementwise difference. Other types are compared for strict equality with the regular Lua == operator. Arguments: * `t1` (table) * `t2` (table) * `tolerance` (number) maximum elementwise difference between `a` and `b`. * `negate` (boolean) if negate is true, we invert success and failure. Returns: 1. success, boolean that indicates success 2. failure_message, string or nil ]] function check.areTablesEq(t1, t2, tolerance, negate) -- Implementation detail: Instead of doing a depth-first table comparison -- check (for example, using recursion), let's do a breadth-first search -- using a queue. Why? Because if we have two tables that are quite deep -- (e.g., a gModule from nngraph), then if they are different then it's -- more useful to the user to show how they differ at as-shallow-a-depth -- as possible. local queue = {} queue._head = 1 queue._tail = 1 function queue.isEmpty() return queue._tail == queue._head end function queue.pop() queue._head = queue._head + 1 return queue[queue._head - 1] end function queue.push(value) queue[queue._tail] = value queue._tail = queue._tail + 1 end queue.push({t1, t2}) while not queue.isEmpty() do local location t1, t2, location = unpack(queue.pop()) local function toSublocation(key) local keyAsString = tostring(key) return (location and location .. "." .. keyAsString) or keyAsString end for key, value1 in pairs(t1) do local sublocation = toSublocation(key) if t2[key] == nil then return negate, string.format( "Entry %s missing in second table (is %s in first)", sublocation, tostring(value1)) end local value2 = t2[key] if type(value1) == 'table' and type(value2) == 'table' then queue.push({value1, value2, sublocation}) else local ok, message = check.areEq(value1, value2, tolerance, false) if not ok then message = 'At table location ' .. sublocation .. ': ' .. message return negate, message end end end for key, value2 in pairs(t2) do local sublocation = toSublocation(key) if t1[key] == nil then return negate, string.format( "Entry %s missing in first table (is %s in second)", sublocation, tostring(value2)) end end end return not negate, 'The tables are equal' end Timer.c000066400000000000000000000111741316246254300123140ustar00rootroot00000000000000#include "general.h" #ifdef _WIN32 #include #include #define TimeType __int64 static __declspec( thread ) TimeType ticksPerSecond = 0; /* * There is an example of getrusage for windows in following link: * https://github.com/openvswitch/ovs/blob/master/lib/getrusage-windows.c */ #else #include #include #define TimeType double #endif typedef struct _Timer { int isRunning; TimeType totalrealtime; TimeType totalusertime; TimeType totalsystime; TimeType startrealtime; TimeType startusertime; TimeType startsystime; } Timer; static TimeType torch_Timer_realtime() { #ifdef _WIN32 TimeType current; QueryPerformanceCounter(¤t); return current; #else struct timeval current; gettimeofday(¤t, NULL); return (current.tv_sec + current.tv_usec/1000000.0); #endif } static TimeType torch_Timer_usertime() { #ifdef _WIN32 return torch_Timer_realtime(); #else struct rusage current; getrusage(RUSAGE_SELF, ¤t); return (current.ru_utime.tv_sec + current.ru_utime.tv_usec/1000000.0); #endif } static TimeType torch_Timer_systime() { #ifdef _WIN32 return 0; #else struct rusage current; getrusage(RUSAGE_SELF, ¤t); return (current.ru_stime.tv_sec + current.ru_stime.tv_usec/1000000.0); #endif } static int torch_Timer_new(lua_State *L) { #ifdef _WIN32 if (ticksPerSecond == 0) { assert(sizeof(LARGE_INTEGER) == sizeof(__int64)); QueryPerformanceFrequency(&ticksPerSecond); } #endif Timer *timer = luaT_alloc(L, sizeof(Timer)); timer->isRunning = 1; timer->totalrealtime = 0; timer->totalusertime = 0; timer->totalsystime = 0; timer->startrealtime = torch_Timer_realtime(); timer->startusertime = torch_Timer_usertime(); timer->startsystime = torch_Timer_systime(); luaT_pushudata(L, timer, "torch.Timer"); return 1; } static int torch_Timer_reset(lua_State *L) { Timer *timer = luaT_checkudata(L, 1, "torch.Timer"); timer->totalrealtime = 0; timer->totalusertime = 0; timer->totalsystime = 0; timer->startrealtime = torch_Timer_realtime(); timer->startusertime = torch_Timer_usertime(); timer->startsystime = torch_Timer_systime(); lua_settop(L, 1); return 1; } static int torch_Timer_free(lua_State *L) { Timer *timer = luaT_checkudata(L, 1, "torch.Timer"); luaT_free(L, timer); return 0; } static int torch_Timer_stop(lua_State *L) { Timer *timer = luaT_checkudata(L, 1, "torch.Timer"); if(timer->isRunning) { TimeType realtime = torch_Timer_realtime() - timer->startrealtime; TimeType usertime = torch_Timer_usertime() - timer->startusertime; TimeType systime = torch_Timer_systime() - timer->startsystime; timer->totalrealtime += realtime; timer->totalusertime += usertime; timer->totalsystime += systime; timer->isRunning = 0; } lua_settop(L, 1); return 1; } static int torch_Timer_resume(lua_State *L) { Timer *timer = luaT_checkudata(L, 1, "torch.Timer"); if(!timer->isRunning) { timer->isRunning = 1; timer->startrealtime = torch_Timer_realtime(); timer->startusertime = torch_Timer_usertime(); timer->startsystime = torch_Timer_systime(); } lua_settop(L, 1); return 1; } static int torch_Timer_time(lua_State *L) { Timer *timer = luaT_checkudata(L, 1, "torch.Timer"); double realtime = (timer->isRunning ? (timer->totalrealtime + torch_Timer_realtime() - timer->startrealtime) : timer->totalrealtime); double usertime = (timer->isRunning ? (timer->totalusertime + torch_Timer_usertime() - timer->startusertime) : timer->totalusertime); double systime = (timer->isRunning ? (timer->totalsystime + torch_Timer_systime() - timer->startsystime) : timer->totalsystime); #ifdef _WIN32 realtime /= ticksPerSecond; usertime /= ticksPerSecond; systime /= ticksPerSecond; #endif lua_createtable(L, 0, 3); lua_pushnumber(L, realtime); lua_setfield(L, -2, "real"); lua_pushnumber(L, usertime); lua_setfield(L, -2, "user"); lua_pushnumber(L, systime); lua_setfield(L, -2, "sys"); return 1; } static int torch_Timer___tostring__(lua_State *L) { Timer *timer = luaT_checkudata(L, 1, "torch.Timer"); lua_pushfstring(L, "torch.Timer [status: %s]", (timer->isRunning ? "running" : "stopped")); return 1; } static const struct luaL_Reg torch_Timer__ [] = { {"reset", torch_Timer_reset}, {"stop", torch_Timer_stop}, {"resume", torch_Timer_resume}, {"time", torch_Timer_time}, {"__tostring__", torch_Timer___tostring__}, {NULL, NULL} }; void torch_Timer_init(lua_State *L) { luaT_newmetatable(L, "torch.Timer", NULL, torch_Timer_new, torch_Timer_free, NULL); luaT_setfuncs(L, torch_Timer__, 0); lua_pop(L, 1); } cmake/000077500000000000000000000000001316246254300121445ustar00rootroot00000000000000cmake/TorchConfig.cmake.in000066400000000000000000000026521316246254300157650ustar00rootroot00000000000000# This (ugly) setup assumes: # CMAKE_PREFIX_PATH = LUA_BINDIR # CMAKE_INSTALL_PREFIX = PREFIX # Define Torch basic subpaths SET(Torch_INSTALL_PREFIX "@Torch_INSTALL_PREFIX@") SET(Torch_INSTALL_BIN_SUBDIR "@Torch_INSTALL_BIN_SUBDIR@") SET(Torch_INSTALL_MAN_SUBDIR "@Torch_INSTALL_MAN_SUBDIR@") SET(Torch_INSTALL_LIB_SUBDIR "@Torch_INSTALL_LIB_SUBDIR@") SET(Torch_INSTALL_SHARE_SUBDIR "@Torch_INSTALL_SHARE_SUBDIR@") SET(Torch_INSTALL_INCLUDE_SUBDIR "@Torch_INSTALL_INCLUDE_SUBDIR@") SET(Torch_INSTALL_CMAKE_SUBDIR "@Torch_INSTALL_CMAKE_SUBDIR@") SET(Torch_INSTALL_LUA_PATH_SUBDIR "@Torch_INSTALL_LUA_PATH_SUBDIR@") SET(Torch_INSTALL_LUA_CPATH_SUBDIR "@Torch_INSTALL_LUA_CPATH_SUBDIR@") SET(Torch_INSTALL_CMAKE_RIDBUS "@Torch_INSTALL_CMAKE_RIDBUS@") FILE(RELATIVE_PATH Torch_INSTALL_LUA_PATH_SUBDIR "${Torch_INSTALL_PREFIX}" "${CMAKE_INSTALL_PREFIX}/lua") FILE(RELATIVE_PATH Torch_INSTALL_LUA_CPATH_SUBDIR "${Torch_INSTALL_PREFIX}" "${CMAKE_INSTALL_PREFIX}/lib") SET(CMAKE_MODULE_PATH "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_CMAKE_SUBDIR}" "${CMAKE_MODULE_PATH}") SET(CMAKE_INSTALL_PREFIX "${Torch_INSTALL_PREFIX}") # override INCLUDE(TorchPathsInit) INCLUDE(TorchPackage) INCLUDE(TorchWrap) # Define Torch basic targets INCLUDE(TorchExports) INCLUDE_DIRECTORIES("${Torch_INSTALL_INCLUDE}") INCLUDE_DIRECTORIES("${Torch_INSTALL_INCLUDE}/TH") LINK_DIRECTORIES("${Torch_INSTALL_LIB}") MESSAGE(STATUS "Found Torch7 in ${Torch_INSTALL_PREFIX}") cmake/TorchExports.cmake000066400000000000000000000010771316246254300156170ustar00rootroot00000000000000INSTALL(EXPORT TH-exports DESTINATION "${Torch_INSTALL_CMAKE_SUBDIR}" FILE "TorchExports.cmake") CONFIGURE_FILE("cmake/TorchConfig.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/TorchConfig.cmake" @ONLY) CONFIGURE_FILE("cmake/TorchWrap.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/TorchWrap.cmake" @ONLY) INSTALL( FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/TorchConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/TorchWrap.cmake" "cmake/TorchPathsInit.cmake" "cmake/TorchPackage.cmake" DESTINATION "${Torch_INSTALL_CMAKE_SUBDIR}") cmake/TorchPackage.cmake000066400000000000000000000035511316246254300155050ustar00rootroot00000000000000# -*- cmake -*- MACRO(ADD_TORCH_LIBRARY package type src) IF ("${type}" STREQUAL "STATIC") if ("${src}" MATCHES "cu$" OR "${src}" MATCHES "cu;") CUDA_ADD_LIBRARY(${package} STATIC ${src}) else() ADD_LIBRARY(${package} STATIC ${src}) endif() ELSE() if ("${src}" MATCHES "cu$" OR "${src}" MATCHES "cu;") CUDA_ADD_LIBRARY(${package} ${type} ${src}) else() ADD_LIBRARY(${package} ${type} ${src}) endif() ENDIF() ENDMACRO() MACRO(ADD_TORCH_PACKAGE package src luasrc) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${Torch_LUA_INCLUDE_DIR}) ### C/C++ sources # As per CMake doc, macro arguments are not variables, so simple test syntax not working IF(NOT "${src}" STREQUAL "") ADD_TORCH_LIBRARY(${package} MODULE "${src}") ### Torch packages supposes libraries prefix is "lib" SET_TARGET_PROPERTIES(${package} PROPERTIES PREFIX "lib" IMPORT_PREFIX "lib" INSTALL_NAME_DIR "@executable_path/${Torch_INSTALL_BIN2CPATH}") IF(APPLE) SET_TARGET_PROPERTIES(${package} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") ENDIF() IF (BUILD_STATIC OR "$ENV{STATIC_TH}" STREQUAL "YES") ADD_TORCH_LIBRARY(${package}_static STATIC "${src}") SET_TARGET_PROPERTIES(${package}_static PROPERTIES COMPILE_FLAGS "-fPIC") SET_TARGET_PROPERTIES(${package}_static PROPERTIES PREFIX "lib" IMPORT_PREFIX "lib" OUTPUT_NAME "${package}") ENDIF() INSTALL(TARGETS ${package} RUNTIME DESTINATION ${Torch_INSTALL_LUA_CPATH_SUBDIR} LIBRARY DESTINATION ${Torch_INSTALL_LUA_CPATH_SUBDIR}) ENDIF(NOT "${src}" STREQUAL "") ### lua sources IF(NOT "${luasrc}" STREQUAL "") INSTALL(FILES ${luasrc} DESTINATION ${Torch_INSTALL_LUA_PATH_SUBDIR}/${package}) ENDIF(NOT "${luasrc}" STREQUAL "") ENDMACRO(ADD_TORCH_PACKAGE) cmake/TorchPaths.cmake000066400000000000000000000025071316246254300152310ustar00rootroot00000000000000# workaround another annoying cmake bug # http://public.kitware.com/Bug/view.php?id=14462 # https://awesome.naquadah.org/bugs/index.php?do=details&task_id=869 MACRO(NORMALIZE_PATH _path_) get_filename_component(${_path_}_abs "${${_path_}}" ABSOLUTE) SET(${_path_} "${${_path_}_abs}") ENDMACRO() NORMALIZE_PATH(LUA_BINDIR) NORMALIZE_PATH(LUA_LIBDIR) NORMALIZE_PATH(LUA_INCDIR) NORMALIZE_PATH(LUADIR) NORMALIZE_PATH(LIBDIR) GET_FILENAME_COMPONENT(CMAKE_INSTALL_PREFIX "${LUA_BINDIR}" PATH) SET(Torch_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) FILE(RELATIVE_PATH Torch_INSTALL_BIN_SUBDIR "${CMAKE_INSTALL_PREFIX}" "${LUA_BINDIR}") FILE(RELATIVE_PATH Torch_INSTALL_LIB_SUBDIR "${CMAKE_INSTALL_PREFIX}" "${LUA_LIBDIR}") FILE(RELATIVE_PATH Torch_INSTALL_INCLUDE_SUBDIR "${CMAKE_INSTALL_PREFIX}" "${LUA_INCDIR}") SET(Torch_INSTALL_MAN_SUBDIR "share/man" CACHE PATH "Install dir for man pages (relative to Torch_INSTALL_PREFIX)") SET(Torch_INSTALL_SHARE_SUBDIR "share" CACHE PATH "Install dir for data (relative to Torch_INSTALL_PREFIX)") SET(Torch_INSTALL_CMAKE_SUBDIR "share/cmake/torch" CACHE PATH "Install dir for .cmake files (relative to Torch_INSTALL_PREFIX)") FILE(RELATIVE_PATH Torch_INSTALL_LUA_PATH_SUBDIR "${CMAKE_INSTALL_PREFIX}" "${LUADIR}") FILE(RELATIVE_PATH Torch_INSTALL_LUA_CPATH_SUBDIR "${CMAKE_INSTALL_PREFIX}" "${LIBDIR}") cmake/TorchPathsInit.cmake000066400000000000000000000041601316246254300160520ustar00rootroot00000000000000SET(Torch_INSTALL_BIN "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_BIN_SUBDIR}") SET(Torch_INSTALL_MAN "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_MAN_SUBDIR}") SET(Torch_INSTALL_LIB "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_LIB_SUBDIR}") SET(Torch_INSTALL_SHARE "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_SHARE_SUBDIR}") SET(Torch_INSTALL_INCLUDE "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_INCLUDE_SUBDIR}") #SET(Torch_INSTALL_DOK "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_DOK_SUBDIR}") #SET(Torch_INSTALL_HTML "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_HTML_SUBDIR}") SET(Torch_INSTALL_CMAKE "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_CMAKE_SUBDIR}") SET(Torch_INSTALL_LUA_PATH "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_LUA_PATH_SUBDIR}") #SET(Torch_INSTALL_LUA_PKG_PATH "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_LUA_PKG_PATH_SUBDIR}") SET(Torch_INSTALL_LUA_CPATH "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_LUA_CPATH_SUBDIR}") #SET(Torch_INSTALL_LUAROCKS_SYSCONF "${Torch_INSTALL_PREFIX}/${Torch_INSTALL_LUAROCKS_SYSCONF_SUBDIR}") # reverse relative path to prefix (ridbus is the palindrom of subdir) FILE(RELATIVE_PATH Torch_INSTALL_BIN_RIDBUS "${Torch_INSTALL_BIN}" "${Torch_INSTALL_PREFIX}/.") FILE(RELATIVE_PATH Torch_INSTALL_CMAKE_RIDBUS "${Torch_INSTALL_CMAKE}" "${Torch_INSTALL_PREFIX}/.") GET_FILENAME_COMPONENT(Torch_INSTALL_BIN_RIDBUS "${Torch_INSTALL_BIN_RIDBUS}" PATH) GET_FILENAME_COMPONENT(Torch_INSTALL_CMAKE_RIDBUS "${Torch_INSTALL_CMAKE_RIDBUS}" PATH) IF(UNIX) OPTION(WITH_RPATH "Build libraries with executable rpaths" ON) IF(WITH_RPATH) SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) FILE(RELATIVE_PATH Torch_INSTALL_BIN2LIB "${Torch_INSTALL_BIN}" "${Torch_INSTALL_LIB}") IF(APPLE) SET(CMAKE_MACOSX_RPATH TRUE) # @rpath in libs SET(CMAKE_INSTALL_RPATH "@executable_path/${Torch_INSTALL_BIN2LIB}") # exec ELSE() SET(CMAKE_INSTALL_RPATH "\$ORIGIN/${Torch_INSTALL_BIN2LIB}") ENDIF() ELSE() SET(CMAKE_MACOSX_RPATH FALSE) # no @rpath in libs ENDIF() ENDIF(UNIX) IF (WIN32) SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") ENDIF (WIN32) cmake/TorchWrap.cmake000066400000000000000000000014701316246254300150610ustar00rootroot00000000000000MACRO(ADD_TORCH_WRAP target luafile) INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}") GET_FILENAME_COMPONENT(_file_ "${luafile}" NAME_WE) SET(cfile "${_file_}.c") IF (DEFINED CWRAP_CUSTOM_LUA) ADD_CUSTOM_COMMAND( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${cfile}" COMMAND ${CWRAP_CUSTOM_LUA} ARGS "${CMAKE_CURRENT_SOURCE_DIR}/${luafile}" "${CMAKE_CURRENT_BINARY_DIR}/${cfile}" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" DEPENDS "${luafile}") ELSE (DEFINED CWRAP_CUSTOM_LUA) ADD_CUSTOM_COMMAND( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${cfile}" COMMAND ${LUA} ARGS "${CMAKE_CURRENT_SOURCE_DIR}/${luafile}" "${CMAKE_CURRENT_BINARY_DIR}/${cfile}" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" DEPENDS "${luafile}") ENDIF (DEFINED CWRAP_CUSTOM_LUA) ENDMACRO(ADD_TORCH_WRAP) cmake/TorchWrap.cmake.in000066400000000000000000000016051316246254300154660ustar00rootroot00000000000000MACRO(ADD_TORCH_WRAP target luafile) INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}") GET_FILENAME_COMPONENT(_file_ "${luafile}" NAME_WE) SET(cfile "${_file_}.c") IF (DEFINED CWRAP_CUSTOM_LUA) ADD_CUSTOM_COMMAND( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${cfile}" COMMAND ${CWRAP_CUSTOM_LUA} ARGS "${CMAKE_CURRENT_SOURCE_DIR}/${luafile}" "${CMAKE_CURRENT_BINARY_DIR}/${cfile}" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" DEPENDS "${luafile}") ELSE (DEFINED CWRAP_CUSTOM_LUA) ADD_CUSTOM_COMMAND( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${cfile}" COMMAND @LUA@ ARGS "${CMAKE_CURRENT_SOURCE_DIR}/${luafile}" "${CMAKE_CURRENT_BINARY_DIR}/${cfile}" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" DEPENDS "${luafile}") ENDIF (DEFINED CWRAP_CUSTOM_LUA) ADD_CUSTOM_TARGET(${target} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${cfile}") ENDMACRO(ADD_TORCH_WRAP) doc/000077500000000000000000000000001316246254300116315ustar00rootroot00000000000000doc/cmdline.md000066400000000000000000000075021316246254300135720ustar00rootroot00000000000000 # CmdLine # This class provides a parameter parsing framework which is very useful when one needs to run several experiments that rely on different parameter settings that are passed in the command line. This class will also override the default print function to direct all the output to a log file as well as screen at the same time. A sample `lua` file is given below that makes use of `CmdLine` class. ```lua cmd = torch.CmdLine() cmd:text() cmd:text() cmd:text('Training a simple network') cmd:text() cmd:text('Options') cmd:option('-seed',123,'initial random seed') cmd:option('-booloption',false,'boolean option') cmd:option('-stroption','mystring','string option') cmd:text() -- parse input params params = cmd:parse(arg) params.rundir = cmd:string('experiment', params, {dir=true}) paths.mkdir(params.rundir) -- create log file cmd:log(params.rundir .. '/log', params) ``` When this file is run on the th command line as follows ```shell # th myscript.lua ``` It will produce the following output: ``` [program started on Tue Jan 10 15:33:49 2012] [command line arguments] booloption false seed 123 rundir experiment stroption mystring [----------------------] booloption false seed 123 rundir experiment stroption mystring ``` The same output will also be written to file `experiment/log`. Whenever one of the options are passed on the command line and is different than the default value, the `rundir` is name is produced to reflect the parameter setting. ```shell # th myscript.lua -seed 456 -stroption mycustomstring ``` This will produce the following output: ``` [program started on Tue Jan 10 15:36:55 2012] [command line arguments] booloption false seed 456 rundir experiment,seed=456,stroption=mycustomstring stroption mycustomstring [----------------------] booloption false seed 456 rundir experiment,seed=456,stroption=mycustomstring stroption mycustomstring ``` and the output will be logged in `experiment,seed=456,stroption=mycustomstring/log` ### addTime([name] [,format]) ### Adds a prefix to every line in the log file with the date/time in the given format with an optional name argument. The date/time format is the same as `os.date()`. Note that the prefix is only added to the log file, not the screen output. The default value for name is empty and the default format is '%F %T'. The final produced output for the following command is: ```lua > cmd:addTime('your project name','%F %T') > print('Your log message') ``` ``` 2012-02-07 08:21:56[your project name]: Your log message ``` ### log(filename, parameter_table) ### It sets the log filename to `filename` and prints the values of parameters in the `parameter_table`. If filename is an open file descriptor, it will write to the file instead of creating a new one. ### option(name, default, help) ### Stores an option argument. The name should always start with '-'. ### [table] parse(arg) ### Parses a given table, `arg` is by default the argument table that is created by `lua` using the command line arguments passed to the executable. Returns a table of option values. ### silent() ### Silences the output to standard output. The only output is written to the log file. ### [string] string(prefix, params, ignore) ### Returns a string representation of the options by concatenating the non-default options. `ignore` is a table `{dir=true}`, which will ensure that option named `dir` will be ignored while creating the string representation. This function is useful for creating unique experiment directories that depend on the parameter settings. ### text(string) ### Logs a custom text message. doc/diskfile.md000066400000000000000000000053101316246254300137440ustar00rootroot00000000000000 # DiskFile # Parent classes: [File](file.md) A `DiskFile` is a particular `File` which is able to perform basic read/write operations on a file stored on disk. It implements all methods described in [File](file.md), and some additional methods relative to _endian_ encoding. By default, a `DiskFile` is in [ASCII](file.md#torch.File.ascii) mode. If changed to the [binary](file.md#torch.File.binary) mode, the default endian encoding is the native computer one. The file might be open in read, write, or read-write mode, depending on the parameter `mode` (which can take the value `"r"`, `"w"` or `"rw"` respectively) given to the [torch.DiskFile(fileName, mode)](#torch.DiskFile). ### torch.DiskFile(fileName, [mode], [quiet]) ### _Constructor_ which opens `fileName` on disk, using the given `mode`. Valid `mode` are `"r"` (read), `"w"` (write) or `"rw"` (read-write). Default is read mode. If read-write mode, the file _will be created_ if it does not exists. If it exists, it will be positioned at the beginning of the file after opening. If (and only if) `quiet` is `true`, no error will be raised in case of problem opening the file: instead `nil` will be returned. The file is opened in [ASCII](file.md#torch.File.ascii) mode by default. ### bigEndianEncoding() ### In [binary](file.md#torch.File.binary) mode, force encoding in _big endian_. (_big end first_: decreasing numeric significance with increasing memory addresses) ### [boolean] isBigEndianCPU() ### Returns `true` if, and only if, the computer CPU operates in _big endian_. _Big end first_: decreasing numeric significance with increasing memory addresses. ### [boolean] isLittleEndianCPU() ### Returns `true` if, and only if, the computer CPU operates in _little endian_. _Little end first_: increasing numeric significance with increasing memory addresses. ### littleEndianEncoding() ### In [binary](file.md#torch.File.binary) mode, force encoding in _little endian_. (_little end first_: increasing numeric significance with increasing memory addresses) ### nativeEndianEncoding() ### In [binary](file.md#torch.File.binary) mode, force encoding in _native endian_. ### longSize([size]) ### Longs will be written and read from the file as `size` bytes long, which can be 0, 4 or 8. 0 means system default. ### noBuffer() ### Disables read and write buffering on the `DiskFile`. doc/file.md000066400000000000000000000334431316246254300131010ustar00rootroot00000000000000 # File # This is an _abstract_ class. It defines most methods implemented by its child classes, like [DiskFile](diskfile.md), [MemoryFile](memoryfile.md) and [PipeFile](pipefile.md). Methods defined here are intended for basic read/write functionalities. Read/write methods might write in [ASCII](#torch.File.ascii) mode or [binary](#torch.File.binary) mode. In [ASCII](#torch.File.ascii) mode, numbers are converted in human readable format (characters). Booleans are converted into `0` (false) or `1` (true). In [binary](#torch.File.binary) mode, numbers and boolean are directly encoded as represented in a register of the computer. While not being human readable and less portable, the binary mode is obviously faster. In [ASCII](#torch.File.ascii) mode, if the default option [autoSpacing()](#torch.File.autoSpacing) is chosen, a space will be generated after each written number or boolean. A carriage return will also be added after each call to a write method. With this option, the spaces are supposed to exist while reading. This option can be deactivated with [noAutoSpacing()](#torch.File.noAutoSpacing). A `Lua` error might or might not be generated in case of read/write error or problem in the file. This depends on the choice made between [quiet()](#torch.File.quiet) and [pedantic()](#torch.File.pedantic) options. It is possible to query if an error occurred in the last operation by calling [hasError()](#torch.File.hasError). ## Read methods ## They are three types of reading methods: - `[number] readTYPE()` - `[TYPEStorage] readTYPE(n)` - `[number] readTYPE(TYPEStorage)` where `TYPE` can be either `Byte`, `Char`, `Short`, `Int`, `Long`, `Float` or `Double`. A convenience method also exist for boolean types: `[boolean] readBool()`. It reads a value on the file with `readInt()` and returns `true` if and only if this value is `1`. It is not possible to read storages of booleans. All these methods depends on the encoding choice: [ASCII](#torch.File.ascii) or [binary](#torch.File.binary) mode. In [ASCII](#torch.File.ascii) mode, the option [autoSpacing()](#torch.File.autoSpacing) and [noAutoSpacing()](#torch.File.noAutoSpacing) have also an effect on these methods. If no parameter is given, one element is returned. This element is converted to a `Lua` number when reading. If `n` is given, `n` values of the specified type are read and returned in a new [Storage](storage.md) of that particular type. The storage size corresponds to the number of elements actually read. If a `Storage` is given, the method will attempt to read a number of elements equals to the size of the given storage, and fill up the storage with these elements. The number of elements actually read is returned. In case of read error, these methods will call the `Lua` error function using the default [pedantic](#torch.File.pedantic) option, or stay quiet with the [quiet](#torch.File.quiet) option. In the latter case, one can check if an error occurred with [hasError()](#torch.File.hasError). ## Write methods ## They are two types of writing methods: - `[number] writeTYPE(number)` - `[number] writeTYPE(TYPEStorage)` where `TYPE` can be either `Byte`, `Char`, `Short`, `Int`, `Long`, `Float` or `Double`. A convenience method also exist for boolean types: `writeBool(value)`. If `value` is `nil` or not `true` a it is equivalent to a `writeInt(0)` call, else to `writeInt(1)`. It is not possible to write storages of booleans. All these methods depends on the encoding choice: [ASCII](#torch.File.ascii) or [binary](#torch.File.ascii) mode. In [ASCII](#torch.File.ascii) mode, the option [autoSpacing()](#torch.File.autoSpacing) and [noAutoSpacing()](#torch.File.noAutoSpacing) have also an effect on these methods. If one `Lua` number is given, this number is converted according to the name of the method when writing (e.g. `writeInt(3.14)` will write `3`). If a `Storage` is given, the method will attempt to write all the elements contained in the storage. These methods return the number of elements actually written. In case of write error, these methods will call the `Lua` error function using the default [pedantic](#torch.File.pedantic) option, or stay quiet with the [quiet](#torch.File.quiet) option. In the latter case, one can check if an error occurred with [hasError()](#torch.File.hasError). ## Serialization methods ## These methods allow the user to save any serializable objects on disk and reload it later in its original state. In other words, it can perform a _deep_ copy of an object into a given `File`. Serializable objects are `Torch` objects having a `read()` and `write()` method. `Lua` objects such as `table`, `number` or `string` or _pure Lua_ functions are also serializable. If the object to save contains several other objects (let say it is a tree of objects), then objects appearing several times in this tree will be _saved only once_. This saves disk space, speeds up loading/saving and respects the dependencies between objects. Interestingly, if the `File` is a [MemoryFile](memoryfile.md), it allows the user to easily make a _clone_ of any serializable object: ```lua file = torch.MemoryFile() -- creates a file in memory file:writeObject(object) -- writes the object into file file:seek(1) -- comes back at the beginning of the file objectClone = file:readObject() -- gets a clone of object ``` ### readObject() ### Returns the next [serializable](#torch.File.serialization) object saved beforehand in the file with [writeObject()](#torch.File.writeObject). Note that objects which were [written](#torch.File.writeObject) with the same reference have still the same reference after loading. Example: ```lua -- creates an array which contains twice the same tensor array = {} x = torch.Tensor(1) table.insert(array, x) table.insert(array, x) -- array[1] and array[2] refer to the same address -- x[1] == array[1][1] == array[2][1] == 3.14 array[1][1] = 3.14 -- write the array on disk file = torch.DiskFile('foo.asc', 'w') file:writeObject(array) file:close() -- make sure the data is written -- reload the array file = torch.DiskFile('foo.asc', 'r') arrayNew = file:readObject() -- arrayNew[1] and arrayNew[2] refer to the same address! -- arrayNew[1][1] == arrayNew[2][1] == 3.14 -- so if we do now: arrayNew[1][1] = 2.72 -- arrayNew[1][1] == arrayNew[2][1] == 2.72 ! ``` ### writeObject(object) ### Writes `object` into the file. This object can be read later using [readObject()](#torch.File.readObject). Serializable objects are `Torch` objects having a `read()` and `write()` method. `Lua` objects such as `table`, `number` or `string` or pure Lua functions are also serializable. If the object has been already written in the file, only a _reference_ to this already saved object will be written: this saves space an speed-up writing; it also allows to keep the dependencies between objects intact. In returns, if one writes an object, modifies its member, and writes the object again in the same file, the modifications will not be recorded in the file, as only a reference to the original will be written. See [readObject()](#torch.File.readObject) for an example. ### [string] readString(format) ### If `format` starts with `"*l"` then returns the next line in the `File`. The end-of-line character is skipped. If `format` starts with `"*a"` then returns all the remaining contents of the `File`. If no data is available, then an error is raised, except if `File` is in [quiet()](#torch.File.quiet) mode where it then returns an empty string `''` and after that you'll be able to see that last reading failed due to end of file with your_file:[hasError()](#torch.File.hasError). Because Torch is more precise on number typing, the `Lua` format `"*n"` is not supported: instead use one of the [number read methods](#torch.File.read). ### [number] writeString(str) ### Writes the string `str` in the `File`. If the string cannot be written completely an error is raised, except if `File` is in [quiet()](#torch.File.quiet) mode where it returns the number of character actually written. ## General Access and Control Methods ## ### ascii() [default] ### The data read or written will be in `ASCII` mode: all numbers are converted to characters (human readable format) and boolean are converted to `0` (false) or `1` (true). The input-output format in this mode depends on the options [autoSpacing()](#torch.File.autoSpacing) and [noAutoSpacing()](#torch.File.noAutoSpacing). ### autoSpacing() [default] ### In [ASCII](#torch.File.ascii) mode, write additional spaces around the elements written on disk: if writing a [Storage](storage.md), a space will be generated between each _element_ and a _return line_ after the last element. If only writing one element, a _return line_ will be generated after this element. Those spaces are supposed to exist while reading in this mode. This is the default behavior. You can de-activate this option with the [noAutoSpacing()](#torch.File.noAutoSpacing) method. ### binary() ### The data read or written will be in binary mode: the representation in the `File` is the same that the one in the computer memory/register (not human readable). This mode is faster than [ASCII](#torch.File.ascii) but less portable. ### clearError() ### Clear the error.flag returned by [hasError()](#torch.File.hasError). ### close() ### Close the file. Any subsequent operation will generate a `Lua` error. ### noAutoSpacing() ### In [ASCII](#torch.File.ascii) mode, do not put extra spaces between element written on disk. This is the contrary of the option [autoSpacing()](#torch.File.autoSpacing). ### synchronize() ### If the child class bufferize the data while writing, ensure that the data is actually written. ### pedantic() [default] ### If this mode is chosen (which is the default), a `Lua` error will be generated in case of error (which will cause the program to stop). It is possible to use [quiet()](#torch.File.quiet) to avoid `Lua` error generation and set a flag instead. ### [number] position() ### Returns the current position (in bytes) in the file. The first position is `1` (following Lua standard indexing). ### quiet() ### If this mode is chosen instead of [pedantic()](#torch.File.pedantic), no `Lua` error will be generated in case of read/write error. Instead, a flag will be raised, readable through [hasError()](#torch.File.hasError). This flag can be cleared with [clearError()](#torch.File.clearError) Checking if a file is quiet can be performed using [isQuiet()](#torch.File.isQuiet). ### seek(position) ### Jump into the file at the given `position` (in byte). Might generate/raise an error in case of problem. The first position is `1` (following Lua standard indexing). ### seekEnd() ### Jump at the end of the file. Might generate/raise an error in case of problem. ## File state query ## These methods allow the user to query the state of the given `File`. ### [boolean] hasError() ### Returns if an error occurred since the last [clearError()](#torch.File.clearError) call, or since the opening of the file if `clearError()` has never been called. ### [boolean] isQuiet() ### Returns a boolean which tells if the file is in [quiet](#torch.File.quiet) mode or not. ### [boolean] isReadable() ### Tells if one can read the file or not. ### [boolean] isWritable() ### Tells if one can write in the file or not. ### [boolean] isAutoSpacing() ### Return `true` if [autoSpacing](#torch.File.autoSpacing) has been chosen. ### referenced(ref) ### Sets the referenced property of the File to `ref`. `ref` has to be `true` or `false`. By default `ref` is true, which means that a File object keeps track of objects written (using [writeObject](#torch.File.writeObject) method) or read (using [readObject](#torch.File.readObject) method). Objects with the same address will be written or read only once, meaning that this approach preserves shared memory structured. Keeping track of references has a cost: every object which is serialized in the file is kept alive (even if one discards the object after writing/reading) as File needs to track their pointer. This is not always a desirable behavior, especially when dealing with large data structures. Another typical example when does not want reference tracking is when one needs to push the same tensor repeatedly into a file but every time changing its contents: calling `referenced(false)` ensures desired behaviour. ### isReferenced() ### Returns the state set by [referenced](#torch.File.referenced). doc/gather.png000066400000000000000000001572341316246254300136250ustar00rootroot00000000000000‰PNG  IHDRM’þzõ? ¦iCCPICC ProfileH‰•–PSÙÇϽéBoÒ;H¯¡ÒATB ”Rhveqׂˆ* º¢àZYTDÛ"`ÁÊ‚ˆŠ².l¨ìá½™·óæ}3'ó›ÿ|÷ÿçÜœ™òm&ŸŸËÁ Âü<é1±qtÜ0 K1YB¾GhhøÇúpéFê–éŒ×?÷ý×’es„, P„ÙBV§ÕÎâ D øˆ®“#âÏp)Âò$ Âu3œ<Çí3œ8ǽ³=a^?Of2É&žÍJF|ÈÈnÍå!쎰++…ÉFx=‹222gø(†‰ÿæ“üž‰O&3YÂs{™-¼7WÈOgæýŸÇñ¿+#]<ÿmd‘Sþa3{Fά.-3P¼Ä%!óÌeÏöÏrŠØ?ržYB¯¸yf3½çYœé1ÏLÁ³\#bž™aŽÐ'\âÏaI2¤/‘p×—1Ïù)ÑóœÍZ2Ï´ðÀ…/‰.‡I2' |%{Ì.dc12ˆR"ü²ÅH2°9Þ>)éç‹<%žüôPI?'ÝO¢ ³Ã%ÏŠ?Ø<§2B|B%çÂAàL "À'W4Ø+“Ÿ'à&§ˆèÈáÐ<–Ù"º•…¥-3÷oîó¾£ÍÞ+ˆvmA˽€K">]ТhDÞ«<± éÛ 1¨´Ä²Ä‚ì9 =óƒD $T@S`ì€3p> „€ V ©S@€°l… ì»A¨A8N€Ð.€Ëà:èwÀC0FÁ+0>€)‚p¢BÊ&¤™@Vä ù@AP %@ÉC«¡MP1TU@ÕP=ô+tº]…ú ûÐ04½…¾À(˜ ËÃê°>l;Àp /‡“á,8.€·Áåp |n†/À×á;ðü žD ECi¡LQ(/T*•„ Ö¢ŠPe¨T#ª Õº…B£>£±h*šŽ6E;£ýÑ‘h: ½½]®C7£/¡o¡‡Ñèï F c‚qÂ001˜dL¦S†©ÅœÆtaî`F1°X, k€µÇúcc±©ØUØ­ØýØ&l¶;‚ÄápÊ8œ .Çĉp…¸½¸£¸ó¸~Ü(îž„×Ä[á}ñqx~#¾ ߎŸ"ÈôN„›GØN8Dh#Ü$Œ¦ˆ²D¢ 1‚˜JÜ@,'6»ˆˆïH$’6É‘´”Ä%­'•“Ž“®†IŸÉrdc²9ž,&o#&wï“ßQ(}Š;%Ž"¢l£ÔS.R)Ÿ¤¨RfR )¶Ô:©J©f©~©×Òi=iéÒùÒeÒ'¥oJËdôe¼d˜2ke*eÎÈ ÈLÊRe-eCd3d·Ê‘½*ûB'§/ç#Ç–+;(wQn„Š¢êP½¨,ê&ê!juT+o ÏO•/–?&ß#?¡ §`£¥«P©pVaˆ†¢éÓ´tÚvÚ Ú]ÚEuEEŽâÅFÅ~ÅJªJîJ¥"¥&¥;J_”éÊ>ÊiÊ;•[”« UŒU–ªä¨PéRW•WuVe©©žP} ««…©­R;¨vCmR]CÝO¯¾Wý¢ú¸MÃ]#U£TãœÆ˜&UÓU“«Yªy^ó%]îAO§—Ó/Ñ'´Ô´üµÄZÕZ=ZSÚÚ‘Úµ›´ëut’tJu:u&t5uƒuWë6è>Ð#è9è¥èíÑëÖû¨o ­¿Y¿Eÿ…’à ߠÁà‘!ÅÐÍ0˰Æð¶ÖÈÁ(Íh¿Q¯1llkœb\i|Ó6±3ášì7é[„Y丈·¨fÑ€)ÙÔÃ4Û´ÁtØŒfd¶Ñ¬Åìµ¹®yœùNónóï¶é‡,ZÊYXn´l³|kelŲª´ºmM±öµ^gÝjýÆÆÄ†csÀæž-Õ6Øv³m§í7;{;]£Ý˜½®}‚ý>ûy‡P‡­W1ŽžŽëÛ?;Ù9‰œN8ýålêœæ|ÄùÅbƒÅœÅ‡¸h»0]ª]†\é® ®?»¹i¹1ÝjÜž¸ë¸³ÝkÝŸ{y¤zõxíiá)ð<íùÑËÉkW‡7ÊÛϻȻÇGÎ'Ò§ÂgÐWÛ7Ù·ÁwÂÏÖo•_‡?Æ?ЧÿCÁbÔ3&ìÖ\ $†V> 2µÃÁÁ»‚-Ñ[Â[ÒB!»B‡„f…þ¶»4tiåÒga–a«ÃºÃ©á+ĈðŒØñ0Ò0RÙ%Uõ1Ú;º$z(Æ;u¨û‡_êkUj‹k¿æª «»To__DíÈö¸AÜ0v4þhï1ïc­¦ÕM´¦âãà¸øøË_~½{"ðDçI‡“§ôNí;M=]Ô 5ç5O´¤´ µÆ¶ö 8ÓÙæÜvú7³ß·kµWžU8»ýñ\Á¹éóùç';øã’/Œt®ì|x1æâíKK/õtv]¹ì{ùb·G÷ù+.WÚ¯:]=sÍáZËu»ëÍ7loœþÝö÷Ó=v=Í7ío¶ö:ö¶õ-î;×ïÖá–÷­Ë··¯ßYr§ïnäÝ{ñC÷Ø÷^ÜO¿ÿæAöƒ©‡ëa=–y\6¨6Xó‡ÑMCvCg‡½‡o< òp„5òê©ðé×Ñ‚g”geÏ5Ÿ×¿°zÑ>æ;ÖûrÙËÑWüWSã…Êþ¹ïµáëS¹ÿuc"fbôàÍôÛ­ï”ß~oó¾s2trðCƇ©EŸ”?Õ}vøÜý%úË󩜯¸¯åߌ¾µ}üþh:czšÏ0gG²à¤$Þ€ ™›‰RsóñlAs3ý,â¹z¶ì¨u ²ÿõTÎÌ Ë!kf<Šp°µµdý«„IÖVs^ddÊÄ|šž~§® €o‚éé©ýÓÓß!aïБ57—ÏT)¢ ƒc­¬ºçÃ,Ôß‹ì둚·iTXtXML:com.adobe.xmp 589 402 œW@IDATxì]˜UÖ==y˜LÎAAÀìªQ\sÜUw×ìúïšÖ]Y1g1gQ׌Å€˜Lˆ(9Çf˜œç¿§†êéîéž©ê®î®aîïMU½z©NUWwß}÷yD ¢(Š€" (Š€"Ð" -žÕ“Š€" (Š€" (JšôAPE@PEÀJš,€¤IE@PE@PҤπ" (Š€" (PÒd$M¢(Š€" (Š€’&}E@PE@°€€’& iE@PE@P”4é3 (Š€" (Š€”4YI“(Š€" (Š€" ¤IŸE@PE@P,  ¤ÉHšDPE@P%Mú (Š€" (Š€"`%M@Ò$Š€" (Š€" (iÒg@PE@P (i²’&QE@PE@I“>Š€" (Š€" X@ ÉBMÒÆ¨ªªBMM_¨­­õ;æùºº:0mà¹Àãúúz# Ó744ùxŽy˜Ö7Ÿ¹Ï-Ó…’„„#_°ó‰‰‰`HJJBrr²w›’’âç9¦IMM5Òð÷ÓÓÓ‘™™‰œœäåå¡C‡ÈÈÈ0âYWuu5<‘'Xݧ(Š€"ÐþðÈ+ô«ýááøÿþûï6ln¹åãcM¸ù1&Y ø’ÆûÞßsfÃWYYé Æ~àÖLÃzHH*Z $$¾$ÄLo’ó˜ÄƒÂ´l÷Ù~ß-ó™qŒX¦‰…y¾Û`˜çY¶/É3ÉË3ãMÂFÒFŒH†LYTT„mÛ¶¡¤¤¼OYYYFÑ}ûöÅÔ©S1bÄüüóÏfuºUE@h稦)ÊÀ®»îê%#:u2H‰‘IŽ|‰RKÁ·™$iii†V„î›Á<6·LkÊSO=…óÏ?ß )¬·­É iðŸF§Ê~7 9Û·‘Œ5Ÿzê©8î¸ãpÚi§aÕªUv–rÒN†ÓüöY´Hœ ˆkkQª„-N Ïmþ¨O’{ˆ SRVSSÖ>䎖8zHˆPØN;¶W‰bĽ—gÉ:Ëx³e9”òÕ«mµÂ\€ØV&M¬(Š€"ÐnPÒƒ[Í)ìÅÅvýU7o˜,c‡ÿ\Y¥ù9«1\”7Rcç®G£^ó¯±Z”ikŸvºÂ?>Ü#º°;Œ˜"C¡½d&ÉS¿óÎÃÇýúcÝ«¯†¥u ·ÝšOPE`ÇE@IS îmvv¶áɉªöÜ×DaKFF†#.³‡û73纟 ë»õô÷(R̺qÆåoÁìÓÏ0|8…Ûͧ(Š€" ˜´ ÒTôŸ›P»Ræ±·Q1&:ÕüHV'%%ÙÖàk·‡ŽÊ·K(Ñ6¿ôþ‹ÔlkÚg¶`‘hçèÃiÕÓO#¥s'dQEgAÔ¦ÉHšDPvŒ€#¤ië¹ÿDÉÃOE Æò—¦¢f®ó"£Öà€‚IœœA'3ãÃÏ9aîÛ€Mo’³ŠµÀ"q;°Qâ"‘p0Ûöë¯Øøî»X9y2¾?õ2KðPñnUÔ¦É*RšNPö‰€#.Röß éãä«E©Y¸éÇÅ¢W4\ry'„„‰þ‘ÂÎ#9pR’ó€âßdØðß@Ñ¢q:èufd54Ñ;·Y;e hž$Æ÷#Ÿyúõ³“]Ó*Š€" (-"àiÊüÛŸZ¬$Ò“©‡„†êêH‹‰[~’¦Í›7;R¿¬'+Óâ)ʱBºMß‹«|#„¸D^4]#Ξšø#< ?ó \eTÔûdÜq§ù Ó’"2½¸2¶æ’ÅÂõü„C—*Š€" (Š@(ÚÄW¢¾°•ÏDεW†ºWÇwéÒK–,q¤NhšÂ0ŠJF jšèI³kÙõæfQÍ"±‘›>¥è³)Žo¨X´KëPE@pŽ’¦²g§ íȱHìÁ¿sR_\‚êïr®À€’ª~˜ƒòç_Eæ%CòÐ]ÎF~˜••„4e‹Öå )üÓíô—íß¶ïûo<iâ,°…æ!9i±lçKX'„ª‰AQ«Ó8xµ¯lGJè,a „&!iе§©v{{æ]¬H—n‰Ôñ§½+ÔÔŠ€" (m GISõì_"Z!§HSgyÑþ†ÖÏ"´mJÈk®éà9¦-¾õ^c†]ÊÈFÏ‹õåå¨þñ¤´“•ò7ßCÑ?¯AÇÿ=ÂÄJ騱¤„¦ÈBMNsmN`* “Dñ¯£^hRµOòDÙOö9n¾KÂT¿ÏægÝCÒTà i2óN‡çÜõ hkE@pŽ’&ca4Yô4PJÿ7©ûŽFò®;ƒZ#Hš„Œ–‡\j×®GÉ}¡æ·¨]´Î> ï}ŒŒ?ŸX<êKËüYðˆMJÙäÐsÌ){ò”Üóº|ð’wkîà°âí÷ ÂÔå³·¼óNÍÊu*‚Óçr9àß&ÚÙ›ªF—á,J[ÿ¶ÌBû_cížÞ2ˆ8QöÉ_ …°–Ê–Ü­A§«UI0…Í#39£(Ã<»É1­¹['ÖÉk¥ ¿ÓJšüàÐE@Pœ%M,<ˆñnÝŠ•¨ÂD"TxÉÕ¨ùù7tzíi!RcšÓxXòè3¨úükdýó$_56î¶?R÷Úe¯LC‡3N„'€˜•>ùÒ>ÜÐr•¿<Õ[fæ¥ç¢ô‘§±å¨S‘7ù~CãäIo$U2ÜWxé¿Ñ}á·HÈÌðæ‰Æšémš‹öÆlZ{u¹XŒog/‰r›ÓWÖ5í„ì^gƒLþ«âdJÝ£AoµyºÅ­Gš’*kÕæ[-¦¤Ñ{“ѹ7©(Ë–6%ÈíLïë=ckgµ,èÛ·oSfµi²Ÿ&VE Ý!à$Š-‡ˆŽÏ>ŒÔ½Gùå(ïßU_~‹¼ÇîF¢,Q.D)ó² ‘~Ü8T}7U3g!mìÞ< 2ÔT·nè+jÛ w!ëò‹¼ço}’”˨,^¼ØÏpiê °Âx¤%éõHßGêN’Ú;4 ¹[{Õ#¥KzŽîŽ$^4]õ¥ XñÍjlذ?,ýß-þÆ Ã=÷ܳ½QÂ+exNmš¼pèŽ" (Š@“&{ Ð4U‹VÉ#Cq%w?Œlñ³dzϽûF]ö_tûöo“jW¬‡Ì ÂÔ©£|øËQ>í=äÞ}ƒ‘¦Ã'I9!õÀ}äCØh»Sóó\C+UñÚÛ¢…: ©ûíå-;5‹—!M´PÙÿ½õ[ Pö«È?᤼?{v÷K̓uëÖaâĉÆ0]ø 0}%ùÚ‘$”— )ª˜ó =$¼E †yM¼RR$1­|çM“/oó}ÌÀãÖ¯~¾Ý¨sÍš5زe‹Ÿµ`Ô€1ÐPÜwëmŸhšL1Hü Ï‘0UÉ_MÀ_½¤õ#M’Ös§\›‡ãyMBL¼õI4 Óã?_Cp#MEcÝ$i RÆçBÂHääψ3jkÀFùc½žêkëÑ«¾—x‡ÿP£Çàüiç£wogôòÞ¨(Š€" (Á0¿ÈÁÎÙŽótHGC9Zš$!'Õß|4Y%óÂs¼'ÒÇâ[îAñ ýGFØåS¦!Cl—…0ÕmØ„B!U<—Ô·ñã–"ª¤]Ú§Œ³èZ>–s‘4°?ª…¥ÿñHoù •¨xÿ±Yš€.M5ìx»v†GXë`¾ÂK®Bò]*†â©‡€†âR$äÊjËOôJÅ*¡³®µ2z$|¼«™‡êo”!Â{‘@‚"Ä«ê«ïŒáÀ’{ËÊ{¸ùRÞÊÂܹPœ+MŸ>cÇŽõ³¡ V\-µ-ÁN8GMR4fˆ•ŠÓË+ñOóRÞâ & ÙÑýÐÓ‡¶CNI]}¦/+AFrëŸi«Ø:ú¿rHæa®A˜ÞÄtì+K­ÃðšìÔª&&Åpx4¸?­˜T¯•(Š€" ¸1+n’—ƒÔý÷ŽÛÅr¨É)²2suŽºo-.6¶_ʱU!y ÇOS¨òŸÆ“˜†÷0N4M¯ãüQ(TgÃy¨Öã9”èkn=gx)333QZê?¼^IšKPE`GDÀQM“›Ê8ód77ÏrÛ>^QŠ?¾±ŸœÞŸ®,Å}:`ß^-; õ-<\?M¾eøîOÂýƬ9ÆÍ”ilgàϾ§#Ú·­ÅÈG:²ÅΜë÷Y’¦hØ{Yo¦TE@p3íFÓï›@ C$²¾´Æ LïžÜe5õ¸ý»|¼~|$'6Ÿ ªú‹rJãÅ:èfÀ”ñ8'‹®é<‡2ù‹TÂõÐý6îÃmÒ»B­V,5[vÛ§éE@Pâ‹@d_ñø¶½MÕîÄù%%8<윗‚{~ÈǧgÈÌÄÔ&Òb'‡ç|ë<gÊòÃ_c)Ë2Á½ÅÊéßÓ¶÷i˜]UµÝ«¹ÜCq ÛÈ$I¶;³W»¦VE@p;ífxÎí7ÂJû.Ùµb~Ö»kñQ°Ÿa9³|'È›YV°í~b Îpþ‚Šgñ’hŸ}jKßR5MáØ­ÁôÁ®-ô±qR ´TE@h³¨¦)F·Î)ìçç¡_N2ŽÞl?j™"& Y,ú² +Óƒ°3îŃø³B%o5ž³Ù¸ôŒ]Ù€eè†ìf3p‰6¶£E@P\‡€’¦Ýj1LïßáV¹µ¢¯,܆ŽînF¾h ÏÝŽ[P,”¯ñ•¸º¼JW¸¬‰]ŸR¥(ÂÌ—áAÿ5 ÃmƒæSE@PLtxÎD"Ê[j0"ÕbLYP„#Ä'SÌVÖ‰òµ+þ¡I·ât•¿7ÅùÀ§øs„¼ìŒÁÁ’[ŠãPɦù¡†#UæÏÙ•h]Úm¦WE@pªiŠÑýpâƒüМ­8kX.–Vƒ¾™^_´ /ÈpÝ}?æ[¾ :oÌÊ ch¯•,û‹ñõ·B“¦âU!-ýeû†ˆMS$B’iW+6b¾‘eƒíë9Al#¹^Í«(Š€"ànTÓ£ûlÚµûuÁ/.Ǩîéè“•ŒnIHÚ;~`+ŒÆ§ ’p† /”egDiTÿ±V»½@qåÉ‘}Qêxò€½òFbfÆAâmX%ygKZ®AL±0é/œ!ÍŽè!æ4ÖaómmmÄÚ@›UjrE@P6„€’¦ݬp@`Óþ¼[.ZOj<ÝÂÓ¹/z FB¿~YìjoÌÌ ‡È´| áˆg/É{J89›çIÌ:ìñ°ªM@Ùr!eÕ&#kžÇJÌ–-[°Ë.»XIªiE@PÚ!JšbtÓIT¬Ø4%ôŠÄ=ªÄ9d½ÏqIbÇ”B•Ž Y%ˉrœ˜O†¨x2;Á“ÝEöseÛ ý–¤­L«5\– —¿ÿçBš|EøRM‘eË„@-*VKX× q’ÈäR’%Elè»È»½œÑ£GƒAEPE@†€’¦`¨D!îÝwßÅm·Ý†Þ½{·XzÂÈcÀ-áT[Y”Ö–˜ð dá¹Ây%892ÏêVPE@°ƒ€Gì[Ì~¸|šÖ"\ËŒSç;ì0ÐïP÷îݽ9}í‹|÷™ ðØŒ«®®‰H,è5›ÇœžÏ`ž vk™þüùØ}÷ÝqÁàñÇGR’ØF%&š°–É¥æ‡å›³Ú¸eŒg- ËgÙ\Æ…kÊùn‰×~£pn‰Õ§Ÿ~о}ûâè£6Œ×iÀžm:¾TQE@Pb‰@Ë_ºX¶d­‹÷'žx#GŽ4®$‡$ÃwËfœ C0ÒÄ4$ $) $"¾ûæ1II0ÒÄòY6ËšÏ3çÊËË be.nIª¸%á"ùâ±IÔ¸5Óš¤Œi:uꄼ¼<ƒ¨… €fTE@h¨¦©]Üæ¶y‘wÞy'ú÷ïÓN;­Ù’8Ñc¸xl.nyL¢FRF¢ÆÀeY˜žÛ‚‚¼úê«FÙÓ¦MÃñÇ߬PE@PLTÓd"¡[×!À¡Ì 6m5WÔ1D"ú£Öïˆ#Žˆ¤Í«(Š€"ÐPMS;¸Émõi{EÑ^{‰¿E@PE Î(iŠó ÐêE@PE m  Ë¨´ûÔfZù—¾Ç–ÂÊ6Ó^m¨" (Š€"`%MV‘raºÊ*ç—.i_^v 6m­rIk´Š€" (Š€s(ir˘–ôÝÜ|\t;ss—äÊšxÉI±u*YZ^‹ÛžYà. ´5Š€" (;JšÚè-íÓ­HPÜ&™éI(ØVÓf•Vã£ï7Æ´N­LPE ý! ¤©ÞsºqOHˆ­FÇ Tô/__['óI‰ Èê Þ3¬ÜM£(Š€">JšÂÇ.î9ݸN—¼Ô˜kšºwJû÷ý!î÷C (Š€"°c# Ýó6zëêèBMÓ9ã´QDµÙŠ€" (Š@˨Ÿ¦–ñqõÙòÊZtHSÞëꛤSE@ØaPÒ´ÃÜJ½E@PE@ˆ&jÓMtµì˜!ðé›°`ù¶˜Õ§)Š€" ´?”4µ¿{Õ+~답˜¿,öäeö­ˆñ¤½¨â¨…+Š€" ¸%Mî»'mºEw=¿I1vnIÀ–¯+ÅÒ5%m;m¼" (Š€»P›&wߟ6׺æ ¢²êÚæÚ® VE@PZB@ISKè¸ø\…¬;WTRÓ]ÜJmš" (Š€"°ã  Ãsmô^nÚZ‰³®û®¶^›­(Š€" ´=”4µ½{f´8%)Ãæ´ÑÖk³E@P¶‡€’¦¶wÏŒçd&ã‡ù[Q[WßF¯@›­(Š€" ´-”4µ­ûåmmFzÆ íˆ²ŠZoœî(Š€" (Š@ôPCðèaõ’«kêQ/ΉÒR£^—V (Š€" ´w”4µ÷'@¯_PE@P,! Ãs–`ÒDVØVZªê:«É5" (Š€"ÐfPÒÔfnUÛhè¿úMí¬ÚÆ­ÒV*Š€" ØD@I“MÀ4yËÐ0}áJ]Τe”ô¬" (Š@[D@IS[¼kÛÛ\'Fà›ÅÉ¥›dŸÝ;!%kϹ m‹" (ŠÀމ€’¦6|_·•ÖàŽçºî *â`ÓôÒ«ðÉ]‡…6HPE`ÇA@IS¾—t7°­¤ÆUWÐ9'Å¥±÷Õ«K:fýšï*,´1Š€" (;I;Öå´¯«IHð [<ƒ»Iþ°gÊB±–ƒGuƒŠ" (Š€"-”4E Ù”['K¨”»Ì#xÎé`PQE@Pv4tx® ßÑ.yi((® ÂUE@PE º¨GðèâõÒ·V‚䩽˴Ï×kñõîÖ¡½C¡×¯(Š€"%”4E X-6¶ËLBŠÛl¼b‹‚Ö¦(Š€"M”4E]-[PE@PvÔ¦i‡¹•z!N PSã.N\“–¡(Š€"à JšœÁQKÙ˜5kŽ>úèàJôE@P¢€ºˆª!Ê|çwPYY‰>}ú !!DçÎC¤ÖèX#°ß~û!;;_|ñ>øàXW¯õ)Š€" ¸µiŠá â‡xÑ¢Eèß¿?Fމ¥K—¢¨¨½zõB×®]Ñ£GtïÞYYYÈËËCzz:RRRœœŒÔÔT#tèÐ Æ6†Íw¼ªÚÚZp8Ìw[\\ŒÒÒR”••¡¢¢Â¯ÎÄÄD“ÂÂB?案2‰)q-))ÁæÍ›Q^^nœ3 #v999`y$I×]wy “'O6ê¾ì²Ë¼qº£(Š€" %M1~Hš&L˜€7ß|Ó¨¹ºº›6mÂúõë°uëV0˜äçI*øñ'™0·$ Ö|4y<ƒ$$%%[î§¥¥¡¾¾¾ $ ,ÛLO‚ÁÀc’¸ººº yØ^ž3ÛÌ-˯ªª2IÓ°lžã5tëÖÍÛ†ÌÌLcëÛ6æaz–Åx’KÖÉkdÛ˜—Á<Çt¾8‘”‘TñzˆõqÇç½Ù+¯¼o¿ý¶7NwE@P" Ãs1~vÝuWlÛ¶ ùùùÆÐÉ ‡ë¢-$)p€¡á”Il| …Ù“™ûÜ’°ËC¢Å@be’-nM‚FBcÆOœ8Ñ €÷ÜsYmÜ·ƒ 2î ïµQ*Š€" (Š€‰€’&‰niËDF¬í™HTî¸ãcªcÇŽ1¼âàUQëäFÃëáÇcþüù “Š" (Š€"`" ³çL$b¸0`V¯^Ûªâ”[4(¡4VM­ÏIí’%KâS¹Öª(Š€"àZ”4ÅáÖÐØ{ݺuq¨¹ÑXš'7‡Ü&ƒƲeËÜÖ,m" (Š@œPÒ‡ÀÙrœÕ!I f¶°ni‹ïõ÷îÝkÖ¬ñÒ}E@PEJšâðн@¼4Mœ-8•?UºuxŽšÀ 6Ä ­WPEÀ¥(iŠÃ¡?¦7Æ¡f¾Ÿ8mß Â™vnž£ „-[¶¸R æ†û¦mPE ½" ¤)wžš&ú ¢¦%ÖB‡™ôóäq«¦‰ÆòôíD·*Š€" (Š€‰€’&‰n©]á”ÿxh›hCäFíN á·TlÒ ¦Š" (Š€"`" ¤ÉD"ÆÛž={Àc\­áh’þ‘Ü nž#6ô(®¤É O‰¶AP÷ àŽ¹çîÁ#f-!i nl¼JÚ`Ÿ%Ë~_ )޵‹d€Cƒn–†çªL;l¡õ©ÝbßZâĵëTE@P%M&1ÞÒœëÍ5—sЀ™Þh1Áq9&¡“„úo½dKÏÞé¬ ‡¢aNÃéµk× [kIèTõ²Nï=›ì½’s<¨•‘²†º¦uö¶` *PŒ^ž]–)  ,ˆ†hlz’×T~wYZ.Ùç¸éLè=Ú4¹Åö+t+õŒ" (Š@,PÒK´}ê"iZ¸p¡OLðÝ<ü¼=4¦ùõW`ä ZFÙè§ÒƒL9a’)±êÆ„˜"ÛQÛ÷7tlÉ5çœ.,üÖ[oá©§žÂW_}…‡~ÿûß-o®U×Zâšmdi+6`6¦c~ÃF,ª˜y _ྒ9èTÒ ¥‹Í²´&rÅ£±¿%Ø&M:tPMÁSQE@ð" 6M^(b»Ã¥L ªô¾û³™Ž½P*4a‰„%Ìß¾Ïe@‚» !x¤ÄiöìÙøÇ?þcŽ9 ,ÀôéÓ HâdU8; iNãvE@PEÀD@I“‰DŒ·ü(ÛúÃõÀ÷?ˆ™³Ø9‹YRØ’’’b Ï…[@ aâPßÍ7ߌË.»¬™j©;6Må¼"ƒuGà<™;Hxç…×äÆÅ„¿R-QPEÀ*Jš¬"åp:cÓØ˜¾€²²²,•nΖcb'ù®‹ ‘¥¬~‰Zòä—ÐâÁƒ>ˆ~ýúáÈ#´˜#²dóñ•8ÈÇAøSdinE@PEÀJšl€åtRj›8eß*iòµÛ–Éw’Ó(ÛBÂÆ¡1'äûï¿Ç'Ÿ|‚©S§:Qœ¥2~‘Y‚ëñ»¥´šHPE@p %MN!F9$Máz& w5¦Hlš|/õÞ{ïÅW\—Ûδ ‡¼. °VþŽÝp †âØEœ¤ Ín4½" (Š€"`%M–¡r>¡©i §d’&Ï…#´irÂ^çÙgŸýMzè¡á4ÃðLŽÛƒD$Élºg„8­Ãª›…©xÿEo Áx©6<¤‡Õpͤ(Š€"Ð.PÒÇÛž››¶¦‰¶Lá’&rWTÈZ%—Lyî¹çðÎ;ï„]J°Yxv ë„^¢g:Ý5¨2œ\^%®/Â\;Å„Lhð2¡žPE@h8cØÒ. rþ"ÓÒÒPZ*‹ªÙ”Í›!>ž€ûîfͲ™Y’s(­&ܱ½íÕц‰N.­Úck%Ûà1‘Õçð9žmÓ®Áª²GM¾>¨l E@Pv8TÓÇ[JÒdgñÜk¯¾û®Ñ–IÌ¡0d(°ûîö/€Óé­._ªô3fàÿû_³Ó\OoÑ¢EâOJJµ"Ô4q¨0RY/KÈLÁ ÆðÜŲ܊BMïŠ" (Š€"`" ¤ÉD"[j|¬’&.Ò{ÇÀÒ¥@ÿþ‘5ÖªSÉPµTUUÎ,¹è°)$<ð>ÿüs\|ñÅft‹[®®6g ÖÈ ºÅ²€ï{X…y8W`1 æ³ÅF„8Éëáð©Š" (Š€"`" ¤ÉD"[j2¬Úí!kÍ ÇpD"]°—DçàƒƸqãpì±Ç¢°°3gÎÄ!‡‚iÓ¦KXjšÂ1gÙµbÃô#¦Ë*‡à,ÜæøÌ9’YÕ4Y¹‹šFPöƒ€’¦8Þk~”›/¥r <¢EæHõ¸”Ç& Ñ N‰J¤³ç¦L™‚wß}×Xj„§³Ï>Ûppi§}¡ ÁdÄnçyP]ØTãRd­Þ”Î$ '^9o¾åË2Ѿ•-ªòÁ‡µ’̆ãF¡©Åº§(Š€"°£! ¤)Žw”¤iÓ&"_9`ûÁ¾‘²_)djµl×J(@2µQŠíû5²õ5*çÐRO Í…vDåååÍOØŒ¡–) i.3‡Þeeÿ4õ•ÔL ôÈždÍáºß¥p’ÛUE@P%M&qØr‹ÚkB£äÁÛƒµ¡ReffZ U†ñN‚›mIˆœ’J'ŒÔh‹–¡(Š€"àÔå@ï=b['MÎ5”Óü©I‰·pŸ[‰ mštx.ÞOˆÖ¯(Š€»PÒÇûAÛ"Î ‹µpöÝÄ[HLÂ5vÛÕ<ÚkùŠ€" ´=tx.Ž÷Œš¦x&jw"õÓäl¼v§œ[ší¡; 5iÔâqKÛ)ÖÇ- #ÏñØ7Î7}Mq&àW_}Î64 ç™7??ßÀïÓO?ÅSO=…Áƒ9lª¢(Š€"°£# ¤)Žw8^š¦–\DÐK9=bs1anyLBa:Å ¶%© 1ɉI\|I‹/Ü«V­Â5×\ãå%7lë1‰Y>ÉžoîsK$Öͺh`O·´#1%1c ÞÜ’42žÇfÓsŸ[¶8±}úxgbýþûï˜8q"^{íµ˜¶‚Ó°9¦ûTFÃm·qhέ„ÎÂ6µ[õílT~ö%rþ{E›j·6VP¢@CY9´I5!h_ôAÒ`|˜ùŽµÐˆ:õú^'5M¦q¹o¼ö‰M{š+í-!9_9v;*¦½‡²ÇŸu¬<-HPÚ2c®AÌB‚I½|<Öz–?–qJšb‰v@]´é¡íL¬…Ã{´iЧ°~Õ4Åó„®».¿EW^‡Äî]C'²y&±gw›94¹" ìH$UsÔæÒPZO‡ôæ'\£Ãsq¼)$MñÐø4ÑÿP<…¤‰¶ZnÚ4Ej7æÆë²Ú¦ÄÎÐsÍoV“[J—2z$í¼“¥´šHPvL<ÙY`§Œï_ií¾GüëµQMSïífâAšÜ`N Ý.¸QHèÔÜÙ;S_¸ µK–£¾¤4ì‚ëƒhGÄýFCEìµµa_„fTÚ1‰];¡¾¨¸ ÒQ•—n³x7F(iŠã]¡6#¤É×Ãu¼.Ÿš.·Ú ‘еgMS4ž‰ºuЩ#ª˜Ójñ5¿/Eå§_6K·qðÞð%N â¶¢àÏ£øÎ›¥ÕE@hD€¿“@©Û°åoÍŒnñ˜“9 /ýwH·-fÞ~Ò“–ކ '̪óñ}l¥¨¸¥QÒ7è+¦Ö'Z^ÁC]‡Ãqªª¼pâIšÜªÍ!ir³©pðŽWžªïB•¥Ú•«‘qîŸP1ãã›RýÛl9ìD]1%>é—6õ°íß7Áül»ö6ãE›sÃÕ~éô úÔ ®úöG4„°Q±Ò‚z™1U³t…•¤š& N=5 {K îç\ŠÄn6‹ôÛÇŽÇæCO@Ù ÁgrW¼ó Ný›ÑñÙ2îT°Œº›Áß·IÈÍ1<æi߃ôHP#ÚhNB)•I#Eÿ¹ å2ÄmâÎñ·¡ÅöpˆÊôÅjüЦ†§%CðZT£6OÉHƒ,šâ—߉·“&u9ù].ŸúŠ.¿Ö((!/ÝfŠ­\Žê_ç#eÝ‚VP%¦ÜI7†ã%MFÖ?.ð¦ëøÜ#(¾îv”=ó2Ò?µËV¢ÓËOxÏëNt¨ÛR`|Ъgý` ¥4ȲÕ³1ìßòOù².»©ûïÝj#Œô­÷¢úÇŸ!ÛÎÓžGò.ƒZͧ ÂC eô!MK<¤qañâ›'!ãœÓ‘ºïh¬ß|j/EÇÉ÷a³tX(gêW˹܆'ŽGæÿu׬Zƒ‚“ÿŠ«~5EV$±g7ÔmØä—´fñ2Ô®X…¢käw/¾ SFí„n]ÐaÌH$ ÙÙ/­”4Åù.pú¬µ>­ Þ…Óñ-¦Èœ[q®‚¬*ç(Rnv É!ÓöNšªCð„Ä䦵‡¡âÃϰíº;Ð}á·(¾én1þÜj,Ò™qöi(ñ5)÷Æ E3hd‰Âò7ÞEÖÿï—†ZÙ¬k.ö«o”ñ«ÆyO±ƒð»6xP»v=6ï{”أ;*ÞÿÙ7üÛ¸’Ü†ÊæîSj׬3Œ~=éF¾ ¢a.8óÑ6” Ëû¯¢^ˆXåGŸ+iŠâ3‘¼Û® fÂáíZ!)Ù¯j<ÝU3¿A—¦¢Š.Fjd‰¯ 6ëò‹‘ÿÇ?É ·H?j¬‘WEú Ç4#LõÅ%F‡&eäîé|þs±Þê_æ1Õs~EÉOïtä3Ï=«©lŸÿ(ÒdÈ,˜T¾÷1:½ø23Ð /âÌ¿i$K=h?°×Jõ{æE1†Ú<>x~Ëá'!ý¤c‘zà>~EsAO#ßU—"ü¨ž·éÒóõÍï—AC`ë_ÿ_zihZz)}ÜaÞò©Å¨ßZè=æÎæ±Çö/ÝEÃhJÙó¯Š6jC+Å8ºµèpF£vÃL£[gHèÑ õ?ýjZýÍH¾›ñ;fDŻھ‚ÓÎímèöÃÇBˆ»5k 5À…ç_†äo?@Rïž $Êœè±q·ýüÝç}„ÿå¼û÷E­ ¹qˆË¦d^üW¤î3Ûn¼»YÚfpI„’¦8ßÚõÄÚ¦‰þ‘è\ÒªølÄr#”-: <2bgŒAË{²rh–Ý ]âÅxDN6‘&.!ãVç–n&t¡vöLò ¨rcJéS/Hu¹ô,F© uzíÃÆ!ûÆÿx ‘™–[~'ɇ´fÁï¨/,BÊ~{§=Ò;͹e6íq¸ÞTÝúFñ„¬LÃÖÁì…V}9Ë·8°÷ºé€£‘=á ÈúCB¨äã][‡¢Ý€œ›¯1È™_=pÚEKd8mŽž½ä·ß|¸>E†Rʦ¼§4¾Ê^ž*‹³¦‚Nj) ªüðsä=r—_ÛT[è‡ã‰]:£nK¾Q.Ö¦?Â[µJ ]:!ï±IHÈËAK÷¢ÃiÇù Ï¿yOÝ/ésQ'šDÞWsx®lò ȸð4”W êËo‘~ì‘Þº¸“Ô¯ñn F+{ÂåÞsž,é\Ùø&y3ÆaÇÚ@dÖ^ªŒÇðׯkɦ)ö›fÓåÝ÷¹ 1ÿtðý8àë1²/CßËîÊ–Ë:Î)à´˜5kæÌi}VVèÖøŸ¡έ3ûü[½#b_¿)ü²JMA¦¼iÑåó·Q|ý(¾ãp꿯½Ð’2œ} 8$g¾T™&±£è,¥GË4I;õGÅ´(ýßËÆŒ9’§n?~"C;£H ½K'?~€7Ž‹¼‡ïB†h%jæÿ.vÑsë^sá…W€ä”H “ô_PºýôŠK±õœ¿ÆÂ$±¾’¼ûPÔþ¾Ìø€r®â‘÷ø=^ÂÄ´u«×‰6! ‰ò‘V‰:¥µ;µ23•Ãu¦PËÃ!ÒÚå+ƒ&Ëú ‰µCeÏN«Iƒbó!Çž|ÑVUÿ2ÙWþY—ž‡ÂKþ…J!N¾FèHð yØTLÿзX$ˆÿ&j’Û‚¨¦)Îw‰Ãs±v4Ií–]ÒÔùYö¥0ú- %O>|²í,Ö0i7‰I¤š¦O?ý÷ß¿1Óí7ÞÀ–-[À…Ž#µi9›Ezƒ¦ð£Xø÷«Å@»RöÚÓˆ¦6ªë¬÷QúÈÓb§r¡£S‘~Lc/6uì°IˆNÆ_NGÚØÍb¼[0öp9´–{çuÞxs§ã3 úû9¨/Ø*Z¦RtÍ–i#Q·q“ñ²¦#ó¯g uÌl›x;ûöFÎm×6f9ºuÄ®‘sã¿Ùn´AâjÖ•— Ã©Ú,‘A{´±Uâì'~Œ“úõö«¼žN [ð2_%†á©¢±RqOjŠü&RÄ–ðuƒ°ø™ñ\—O§‰ÆözCKh‹‹Q75ŵbèMû§<1÷•ª¯¾5´LŒË½ÿV”Ü÷8€Z¤¼Gï2ÈWbzwÃNªäÞÇŒY³f~ã÷.†ªj3ÊØÒÖ‰¤»-ˆ’¦8ß%ÚΞk(\úŸ·O·LL–VÊ YI„-2íßË3.!I~YÒƒËÈ“Qª÷ª<û"a¸¿Š”†à´Û±#UenüH¿—v²MKMS$˨Üu×]øúë¯qóÍ7cĈ2bSꘇqb“““´Ýí%2Yz %bÐÉa6y¡%ÛU4k‘sý¿ü´I}z!÷ŽëŒž¬¯ãJÚ0¥²?’ô YK¶H òŒ¦ÜhX@ÆùgÉQÓ«‹=׎/<†j±Û0–jÌ ÇŽ!@¢Ì@æQ‡ë…™D™„¨ú—ß aG¾ j+ÄŽ%çæ § &ýs)ij#5 Gµ¸ˆèòÉ›ÍÊão=OÈOåÇ3Qþæt£ÃDíTb¯žÈ8ód#=5‹Ô2&Š-Såß Û÷š"v\²¯¸Ä“€qF^0!Yó•ä!» â­÷}£\»ßôæqmw솅šþß°u jÞoþâñE#}ÒL9¶7ŽßÅߨÎ7Mâ?4#M´¥¢]yÎŽ£É¡O-Å„};£Fì™8L——–ˆ ¥5èÒ!ÑÀö“•¥8¬¦¥ë¤WpVÛ‘yZ–ŠŸ³­ß©Ý S8†àáÿòË/?~¼æ†–$V5MaçE@Øaˆ:i¢WØ*¯F×LÞy 1]µ%4¹h §®RjW¬¿·!ëò‹š¦:ñ¼ùàã~Ü8twý\7§-Šá¹)ìg~+ÄÏ›*QTY‡¥¢yš»Efz%% ¬¦Ý2’,“¦¼¼<؆lÏ)À†i@Þ>â!¼‡íìF’&»~•袀D/¢³çb²Ö¡(‘"°víZäççƒïs¾Sé÷ndêdĆYü@IDATJ Ó8A¼|S»Oá>*30Ó™iÍ6ùæ÷3ó°<3¯YG|Ë5óqËò¸l l+9âÀÀ8^ÇO?ý„yóæá¯uç ˨“&Ãñ™RöÄs²8çF¤ŒäQ{ ýÈàC-œÖJGuµ+V¡è?7„)UöôÚ;åŸz®±Rv0›(ß´nß·c~üàl0˜òÜÜB”Èì¹KG‰'p›ÂåF.”±¶0¤Ç adòÉަÉü¡Ïž=óçÏ7ÂÆ ;§^½zá†npŒTQÓ¤Ãs>7LwvŒÀ}÷݇ÓO?=z„ÙKŒ"v§vš±öæ±ÇkL(â¬d _¢ä»Æ)ãMc’nMRe67i2órË<Ü’ì0­I~¸ ¦©gÚ¥’T±$\¾Ç¬Ÿy’¿¯¾ú œø3fÌÐ-΀0lØ0ð]o‰:iâ’ ¥qˆ83«@õ¬±íÆ» ÕiŠ8J p§Nwí\—f›SÃÄ5Œ¥ìå7Äëì_‘ñ'ñ²ØÆ…WQQQXW1 7o/±7ÄfVÄ‘=”`’€è>üaÓÝŽÐÕÀå—_Ž]vÙÅX6åÐC5¼v³WõòË/cøðáX±b…"C¦U›¦ÐxO˜=K³WÉ{Jܨ¥3_„f”/pÆ“ŒšixÞW_Ò|!óåjÖÅ«oØ7¹onY6É/—Äa–®*b1‘À÷š¬ì¯Zµ ýú…^nÆJš&zð¹¹ôÒKÁõ-Ý&ß|ó 8à<ýôÓèÒ¥KLšÇ¥«&Nœµºø=|ýõ×Ákã{}óæÍ†3ãë¯¿Þø(ÆS¢ûuô¹2¾È<²üAÚa¡èš›euó÷.kñ\“xPöÌKb£41(abºêïf£ãÿnÊÒ†÷øRç‡$Ü1‹·'>­•ÇئM›‚&û'žÆ8ë°[±¥(D¶¡%¨”½* u«pQêÊq¹Uû•%ÊVt@Ò‘%,¾DÌ®'rL{¦ /¼\sî ƒòÖÅ15WÇsŒ7.Ò~̃õ”"-·-äß°aƒ¡¤rõêÕØºu«á•8ӯɟW³§iö*Ía>Ϧÿ-ž3{²f:žgZ¦ñ%?Ćeúövù^0ósâ{´,Ç7Ÿ/A2ó›q ŸuŠJyJžÍDÆŸO y™5 eÊV@/5db—ŸàÇ™¤p¤»Ø05 µè”nïVRÓDL2'¤éô`§‰ Ǧ©gÏžøöÛoÁ,{U<^·nñ!äKã‰'žp¤m,„ZóåéX¡./ˆö¯¾ú*<ð@ :»í¶¨Íã Œêr’;KïÄû²§Nj\ÓàÁƒãÝ” õ³3À÷ãgœ³Î: £GÆÊ•+±~ýz£S3gÎpé ?~ìùÑàóiH’PÞÞߨÅ3 e`#HJÌòHJ¹ObëK`Í|,Ç$*&¡å6Tzæãõ™šI¶ËÔ’ܘZDSóhÆ1 Ó2ððÁ-ϳ~SØâÀó¬šEbCaîóÙ5‰';æµ0=Ëüí·ß¼$Š;“41Ý^{í…™3gÆl2Šy}V¶C† 1l€öß+É#Nóî»ïâ‹/¾@´HØ·o_£“ÆûÎûFá}à}·ØûÒ†ÙÚmo7 Ì8ëTo öeñ@º H•õ‡L©/(4ˆ”yl›>þl=ÿrt~íi¿Ó òC«úì+c(Ðï„‹ø¢ã>\ù{öL¬‹/¾ }ÊpÛ`7_~æÁNÞ}öÙ3fÌ0 ùéÓ§ ä}9Ú)+TÚG}]tQ»#MìIsy>jeøb<âˆ#BAÕ&⹨3?˜n%M&ˆ§žz*¾ûî;ãCdei#“lpkjQø[¦f-Øû„¤€ä PÇôìÙsMG›³L¦`éYŽ/"©!QaÇûÁ„ÏóqËN"·f Ù"Icà9~9üòÊ+¯maïI;ߡ޼–k’¥`m 7Ž„„ö”±šÁk§´õyï½÷ìd‰(-íŒî¹çžˆÊ°’™Wúç1b„•ä1KuÒTùÑç†V¨C€æ¨|ê;(á5d_u©ßÅvûö¿ã`9·L›§ÛPxùµàB´‹"ùª]¾ÊXy;X·Æñ¥Á]<¤S§N†š:Ô‹(ZmâË/Áp„/Ž÷Þ;œ¬–òœþùØo¿ýŒ¡@KvDj&bAõ8' DSzè!ãyˆÆl’_ýÕ•;_L9ôÒK/ùFµ¸o &¢ö$R!à}6‡ #-/ÜüÔ~“`ùÁ[)‹ïQ†h ‰‰û­v+×$ÁÎE#ŽCÆ|Ó†”uGKhK¶R´®íŽ4±·RñÆtt8åðHO3誾þÕ³A·ï? ËU}8åÜö_Tÿ0G<‡oÃñ*peí¤ý¤¼ÜhÝè”Ë^‘á9Ob²¨‰²eHS–QÉÌCCeP^„†j¿¯,]xã´ÒÆÆŠ;=ô Õ4ï¿}ì>*¤PÚ„Kš‚çh{Ñ$fí]Î;ï<|öÙg©Ž6i¢*žÓŒ£!;í´“¡ŒFÙN–ICð5kÖ8Y¤­²¨‰sÃo’dÃMn>G¼?Ô±nÚ§ÆòÝEm5£Ñ$M´ß eBOü£þuà̹†;&ÊsOÓž†¤^=¼ûPc•lÓe8xdÝwX/œ2܇¤‰êí@IØi R'|äöxÄ@1M´3iYÞ¸Hwºwïúù E,…$щÞq4ÚÌm4T`Ìè¢1x´?b´O£At4„³å-Z¢-“D¿ ~üØ“‡ph,ÞÂN65MnÞ’ûåË—3xÝÖ>vl8‰ƒ& ±Úx}òÉ'†-^´ê£‹‡ßÿ=ZŇ]nÔI[ÖáÄñF»•;pF’&s–‡ße IòäE×'EïÞ½ ÒäWo h{@[7J,{kn¼~ß6qØ–3æ¢-Ô²ðc ¡Í í÷Øc ‚ŒF}‘”ÉN ?|ñ"MÁl˜"¹žpòº‘0™×1pà@CÛD·'nv<8y V¤iÏ=÷ÄwÜUbõþ±{ñïZØmñ–Þî𜓗Ϟg ÅZHšÜ¦â61 1k4m#ÌzÚ–Ã5v†ŽÃ½&’ÎV eHn¹f>­Pcævá”÷Pn@¢ÝvËXÜëÖ®ƒÄ-ج½ÖòÅâ<ŸS7 ;À±|vˆßÑü]ñ™ ªPˆó PÒçjx.Í2{¶±¨Ë·jsܪi¢Í‚†)|ñŠ×>‰m¬^Z´_gY+ØPÍO{!+mdšx~”ÙQf&`µíN¦sëïïK®BàF‰Ç³CãxN²ˆ–(iвm¼\>Ô¼ÄCøC‹‡¡{´nÕæPÛ¡†àO# =Id,„6Ñê)søÃ^n’»xµ“C!œÕêq«¦‰÷'ïK+÷$`Îø¤o«h ¿n!ò¾×›&ß ußj\âõ²¢íD´z÷þWéäæá9µijºWô~-"ÓTKã^4 <í=¾ n¶“3’â!´%r+Y‰Áê¤}‡‘Ý(ÚýàƒÖÝõ4o;}k±CAò³R—ô*`väi”ß]ÂüW] Sá·ß~[â›d ~À*Ì7"zbgtE?cÕ3WŽ(•u&¸’„)™âL¹;ʪþpøþqƒp³æVI“‰Dœ¶Ô¸Äª7x‰æŒªœÙS‰•°÷àÖá9%MMOÆÂœ5òù‹V/ž¶{Ñ´½hB,²=bC¶šï>ûñ’7·Ïц‡ŽDÝ(Þµ–hc{¿ I9S›$ˆäh-¦¼R‡3ÏwŠâ­&ÔÄEÛž_6Û…³jÝ0|‚g1™I,oÏÆ­8üÒ“4¹‘¤*iò»Mñ90gÐQkál ~PbIšHÝ:<§¤©é $iŠ•”ËZDË^„Ãs+W®lº0—îqø§eCcjÌÙŒeßtË!TÚÚpY$n×J ö€~ŸK`órøÍ×Û™rüŒ„ÆÜ@ÜlNҫ߃qSlü#iâ* ¡EüÊ¡M2ÇØå< ¦4ùôˆ ~HÒÔ”¯iD»¿øø[°`ö؃$,ˆˆû@|&á |\¯•à¯P’©ÛGódDÈ?â­¤ÉDC·~˜3謒&ªÑÙ«`ÏœÃ'ÜÒ«8íqxŽ~î›Çc\``#øÀßtÓMÆr$¾bo/”ºžçh(Ì޹÷I†ÌÀëâ>µJ ToóCÌ!A¾|HžÜ6‹ŽmâuðÚÝÚãõ½OÑÜgOëÅBHڣ哅/_>£4j·ú‹Å5ÖA¼)üP˜ûþiî’OËíF”ٲ͕@µ-èGÇHØÂ?šìÕ¸Æ 5¶"Ï‹F`)fIÿ kTöÄ`ù&ÉâÜü*ÊÂìÆ_ ·bÆ›aÿ?ŽBÇ.y²Ø·,š+‹~S8t3#ŒýHÿñÝG×Nœ?>H>o½õÖK®„[ï‰[I×dãoÕŽŸ+’¤oÅ,ç–pJbÞŒ4Q¡õœ<Ë/Ycdÿ&K¸JÂ},¿Ô<º­#«š& 7/ÚIøpP-O#P®¾MRARÄ¡Žéò%Ê}nÙd¾ähB»$¾$HPøa``y&‘!)áÇŸD X0?&¾«Ióz™'i⃊ˆ1?N‚c[¹õ üaÿðØ0a‚ñc`=üÁó…Dç~¼.+sŸ[^?vf ù2¯Ï¼&^§o{ÙF_¿3LÏv°Ýf{Ø>¥›[öÔH"9#äâ‹/vtàh?CÑ(Gž#6¦=Š›IÛÉ©ã|'MLÑ( Y2µNflø[þ>ØÉ²"Ë0?ãc#©¹õÍW³Mt^3€ÕOʰŸp»‰ÿŽØ$tÊGÃp.Ãy!H“ÕÎ Ûûì³Ï¿SjÌ;î8œ~úéxã7Œw¢ÓCËnõDìù¾ã²X|¿ZYÔ–£Œ—_.„Fxº,µ6iââÞA½ù?üü0’ÜÚ´ü r˦³w> |gó=ïqOKÜ‚HÚAâC©¢nªá©!""‰¸à‚ @2|à9“ˆ?÷Ñ‘_ f ‰ày¾äœ’†?þ|°ÅY*㤓Nˆåb»¬Ÿd°°°Ð ˆÄÊ $1:dO’DÌ &éñÕœªõI_*ÄŒŒÚ/~<ù!¥‚„“/D“ˆ’ŒþóŸÿ´t-;r">k¼7Î 5´ á‘|EÅ ´Q’ѱcwã·°=Âñ ï/É1ï­›…˜‡«Ý“¥À0p'`ÅJzs·w•f§Â^.ÿÔ¥‹Ä”X´Å¿E³0âöë üx2~»æÙN8ï¾û®ãÚb¾'øgû¬;³M±ØòÁ÷¨ÒtÖYò¬ &Mjj5Ov…ß§©S§6Ïö/!L¼R-ƒÃ3Œ ½1mvGIS nI59$A$CìMòa35EW]u®¿þzpɇX {h-ÛQ8ß"’’ _D ´iqƒPËDm{Pí]øþŸ ÐÈÒýäÿß% QóA'’K$Ði*mihgÃýU2„ÄýæâAù],h&ç©w+r¨hFûUÄk!ir»ðƒ×²mJè+xüñÆsá|ü¨©õÕ̆®%ø™U2ô2÷`ÔÀлES²½-¢¼ª•Ç‚'§åÞ{ï úμí¶Û Mv4ˆ I‡èø<¹MH¸ÙálIøl$ŠwÆ‘{Ó¦5¥hØ45ÅXÛã÷ƒ ÷¶*¢yÌàhl÷VSº>’¦Ü¢ÖfÇñè´*ÙêeÅcÆ5D$J*îF€ÚjäH6ø±h’™B€fm?ä›×·KÙ”ÊÎËçï„=yjM™Š;Ä|¹È8ìŠþHAšP³|±)KžÍbê¼Ëñ‹P¶B3‹XÖäaŠW‹ÕMCÙàЛÍ;$MÔØ•/¾“ïÅÀébÔŽ»%þ#!•ë_zýèq¢Ë7ˆ"k’çïÄQ°Næ‹/¾hhÇïDÍÊ05–±$MËÖ–"9Ƀ¾Ý3šµÇ7‚¤©µïÈÝBh)³g7nÍÿ2 &Ú3óÈú–8pè¬U ×›Ræ e"2Jš¬?QKɆ³Ã ֛ʉ9Üej¬ç/%{C|ù¨¸³÷êOšZn7ר—C†½DË)ýÏRóJ{>Úè™ò>j´Ê<ôn«¶å˼½½Q-î°ìXkT[lPˆ“"¶Kîè¤zâDàe!.ÿýoˆ‚[‰&9n­s×R{} ŒFå±7YLÖ¢jw²³*FWCD[°§žzJp ¢$±œQÊK a:ëºï°Çι¸úì]1 WfÈ+³bƒxÚiÀÕW/" %KâìTšQ„”ßiµŒÆ§ì¼Þ¶·lk—èþöòƒI//Ò+¤¶)VÚÐŽ‰öD*îG€q»ÏæeæËٮشæ»NfålzøaðÍ>2‘> ÇªN>ãÑriªÎpâ9|ßÚK`¹$L4=ãˆQ¸>j ¨W ËY~Ðé ™:¾³õRm­ç„a'yî¹ç aÆE f0_^YÛêðæ×¿lÁ³6àãï7bC~…¥®Ý\ŽKN„Ç®´Ô–íXù[mMÓÊÖ-Üá9^IítCÊ{B˜Ž y¶Å$Ð$Òn%M.¸TqÚý09Ùl~PZûX9UÔì¹U"ééºõš"iíñì :´NN¡ßûµ¶Fl¨¹øJl1Ö>/¶3¯7oäè ¡öÊÎuX(2*Iø.°«iÚ{oÈdq@Û8Äq"Y F½Ȃ/ÂoÅØ8ò‘[KM †òq1êúóŸeŒ0òwÏÁþç~Šw¾¤ý^p™·l>ýQ\Äl­De•{YƒöìŠ?ÝßHÙ£sË?*±†ë!Üá96Œ¿­I“< 8˜)í ;Ù‘ØÚÙ¯±õ:<×:FQOaŽ“G½¢°g-oÌU’rÈÇ­B;ÚÕ¨4"@ígãy!4\Yáºëì#H÷-›÷eD·÷ÙB–žk,»T†­ _ìv®Ãj¹N§³2ÄXçyç5ÅPÛ΄GöèÃkª½q¯FLÐß Ãrb˜ž#ÓEjèè3ˆ¤éœsÎÁ~ûí`‡ØJgŸž¸,—YÒBˆBÉE' uÊ‘xÚÒÔ"ÖÂßÖܹsƒWû«D'Hüt[Œåå¨Äö.9>^Bm¦û€XÙNÙmÓG:LNnÎÃYVɆ˜”`áB`ìØFM“Ýwkš¦ÃeòI˜ˆY¥Œ¤v³†‡À[º°VRtSñ]I;å2Ã"M¼ªH†ç|QYó? óa2,7Ø7ÖÚ~$šÞï¿ÿ4Ÿ1cÆg¸q™ûì3œ}öÙÆ{íè£6|½…×¢¶“‹÷0\-9«v¯&2|G„4ïàDÐŒe$™®Ûø!@Ò¯Ùs¼jª¶£µ„E ªJšq÷1Ÿ +š&qo…×ÅÎÈ×÷‹Ý+#±±JÞKæ‹ÃÄ5âDñkµ°lj9ƒ r}íEj¹r©HÚÄX"%MtÎ ?¸œ ‰4lKnv½UˆsÇðJ t9^)bW'×sÀ`Ÿ}ö‘ÙbWcåÊ•Žø^s£a²‰5Má’&ù™‹sYÈ fiÖ·-Ú Ž¦ŸŒï'åõ’ÀŸÔz 2qAÖÝiônñ7,©]!Jš\p"}QFz ‚¡·ò@©—`ÃFŸØdéPÕjNf¨}yÏ6ȰIÃr ,¢Æ'½ü@»†s›âØ“´²”Åû’¥ )›ß^­Ø=#Ô åE¸“Ø1 ÛsOüIf͈9ºJïFØ¢š&èZ_˜98ä`Y“S¶æÄ%ÎÆ±+VæÇyÀ?åù²ø ¥i*.­ÁIWƒÑCòÄNŠä˜2¤6®:kWï%•TãÃï6"/+{gÁ{2ÄÎÆ‚JcÊøÞÃ:Z&`Ä \c^6ƒ ÊOýû3ÆšDêr€µlûI^ RÞ>Öê –Š}'dÑ¢E¸ä’KŒuèœrVëfó‚‡;Äʉ“òjKøì°“™="ô´AŠ}RÂJ ´§G&î“@µ áj[(2âS_9×£´€@¼5Mì)‚©½K¼°8¥;äå ©ò%M|éðÖš¼( ÊÄ…b TÈ}߉º=]¦¹öØ*S]ï’ã—~ü#¶»DN6 mävr `'Ǫð…î‹Çjm)ÒtÐAâó[^~òy…%øñ¦U±ú; Ëdéw=ÊjÉ$Kèj2²3“1ã? ¦VÖY£ŽÎ¹©2#$ØÔ.qæ¾}€Ç“NˆÄÉ+µ5Ixþùy›“bšÿ&|ˤ»Î`4Ölö=Áýî® Œ´vL"HÌÝä¢FI“µ{ÕTV?Ñj„]oÀù⑹,}aJ¹¬%ƕͭH¤/E2-k°,öÛk»# ì„]o¹ë_{Í[}ÙbËì_â{i˜$ìlùíÉa|’¦qb̶5«ñn“,#R^Óû¸PW;,{dflùšÌT±Ý*iŠ-ÞAk³ú±03³:[üà'NqÌ}>°|áð£Às\NàOú“™¥Å-_Îv†.¹¸·ÉŠcÿÀ ¸V´¬uâZÞÚ£D›&«ƒÀF—-_.½Öõ^ÂT% Sºs v ×r@%ìéÇcÚn@3\sHɘݽJˆK.[BRCì[Ò²ˇ™ëšåìa«h#±éÝ<4Ù/©åÔVù ‡ýìI?~V'3d,û¨Z7ÈT¨ì+a°vväæÈLè•„F‰ÔV‡Fă¯—!Ú!B–/•{”&Ãu?7zlù²¬1飅4ë ¶åýDÓtÓM7að`^gt„÷Å­š&“l¿òqÝôƒ¼îº·p¨ˆCzñ^HÈX³sµ'CXœ²´¦¦¦©)&þ{Ö¾tñoçÝ~,¬ªU ÄN;íd¼Pÿõ¯3MhèÈ |ñÑáëb‘ûã?Z&MT}Ú1 L•>ÃÑ/¯ãx÷ãNÜcùþXž V`ÉüùÈ=Ú{j6»Ö"™5ã”ð…É´]§Úá¦rhó¶Ntöƒ ²Ü,zær*{íe9‹‘ÐÔñ%Lj·ÄÖ.!i²jhNùNå Ý»>Búéì™WùU5î¨d9î,ATN†U É·Áq”~Â΂Õçž "“Ì]Ã75jœhð;àbº"Ã>v$Òá¹h&j Ièi÷èF ýÜ4o-5qNŠIšœ,“eÙ¹&§ëUž;ï~¨ÖîÀñìUr?­É—_~‰¾}Ùs .<ä‘G?"–½«õŽ#eAÔq.Áþ8PÞÖ{Ò$MáŽO7ÈKké=÷ óÁcÖG`€yæŽ…Ê =ŒêäÈEISs éü”SŠí&Žž¾ðBó²Z‹aÏ—>ÃB‘¦ŽûËÇør±ß9]4ŒòÞO¥IZ/¡ :·$M‘ø@j­íNÝ»>Xª`p^¨M´jÓtž—ÉO[eP¾Næ”ʶqø±Z]M×*4Œ•±ºí’YôY ]dE93ÊØö á¼ÇICp¿ 8 ,Üw—Õ·ZDËš¦V³G”Àž3 !ù¥,Ët#Âo Å©±æ7€[ÞkbJ2ʸ}÷Ý'žx¢Y„±UÒ䇸"@²ÄºÒÔa¢V€¾Iì?(|¨­Ô&ÎÂé8 ã<œ?Û©Æø‘°®p¤‡ôŽ]y%Ö¾ò öy÷]t“k\ñè£([²Dº¸bÔူÇÍÞ¤J$MVÖ&×8‚xí–3ÚßÊ&yyBž­Æ8ÎÒiiz’¦–|5±”ÝîÖ<'e.”›5âz`­8~X--‰X[ Mv5¿-\²åSü8³Ÿ,äÈíC4$RMS4Úd–É{$öVf9ÑÚ:EšøÐ.Œ&ÜçuówÃ}’Æ›$ÈLÙŠ$H¯È»™qì|ò=ÏßG?¨` “Y>küó9çû–˜Ò `àÀÍ QÒÔ 0 Y჉µßqǸí¶Ûl;ˆ£]‡.8“®5IA ®Æ‚ýði-¹ßyþð8”® ¹ùf¿¬™bõºé½÷üâ"=0{ÝnUÃGz}vóó™°BšdyL œrŠO ‰òâÌh–ÌNËÀ˜1ƒDë0Vð9£]M¢„¦1α2„ÖçÉfSøÒ¶R¶ÍbOÎ玿åX ?¸v†è£Õ6’&~(Ý(víúb} &٦ƿWzWç{ö®ìˆñãKxèƒm‹L¡ãyþ6˜†Ä†¿~—X6·Œg™|'˜Dˆñ4™/æ´)c§Þæ5ÅË–êÓṖЉá9>x‘’¦gŸ}#GŽÄˆ#l·œÌŸ?«ò ¾ÂPYPh’X4=§­f3z+ü±9%™C†`ÉÝw;UœQ{=|Aòž¨ˆOº^½œŸ¾#Äçsf„&­Ü1øò21# ¢6±¿„ž¬ í§|5MœsJ8'[ly8ü“‹nbê¼vÂH ÄžA³Øp´D²÷mu¨Ì©æÐ’„%ÞÂ6¸Ñ7qq³8ÛG¢B­µ<‡rˆñþb'•j}øž'A&±aç•ëõQÄßßwLþüv-X°ÀÈ϶8%ìÀòšÜ$Jš\r7ø@óWøÐR5ÊYsá½UÒ&ktã}LÚô=þƒ«ð¸h›.’¹tV„=h'ISº 5È‹¶BzVÜwBˆ¨JšÑäl3ÿ…tÅŸNvêfeðÙðB{ó ƒF: §ûL›nœ¾,Ó¶, I‡®Ý.üøÅƒ4Q3oqËðílx¸eŠ„i‰˜°}‹/6`"^ôíE’Ë'Ó±Óu衇ÆJ’%¶—§œyZ½’0«ß«e2µŽÄßM¢¤É%wƒ/ôH†ž’±‘±cÇeÌOw4ÂãùñœmEX?Ç¢­ÈO˜-“•‡³çnÃ]8[,›Ša' Ä[Cötœ”Ü1cdzóÏŽ‘&¶Ïm½'ñ²[{¡ÁœŸÚ-ÇJz¾|}5M‰âß% $i‘K[Ñ4Ń4]’«C+´ýc'ˆ[~°·$Lá?ß@òÁxæa0…‹î’ØNŸ>݈¢Öƒm X—oÖÍcƳ^ÖÁ-ë6ͺ¹e`nYÓñ˜õÓÞ†[¾׊3ÝÿnwoÂ÷«Ùvv°h¿Ã4sçÎ i"YcûcM˜x£ø»¤ÓoÞÿÀ-qç=t“(irÉÝ ¦É iâ”?Vþ8Ø£`ïç·ß~I ĹÏ!:~è˜ÎªØÓ4UÈpH®Q´Œ~‹ÓGpŽ§Ý°» Æ´¬íá‹ËIM‘3|8Šåºé¯É 15MN”µ#”ÁgÉÊúsN\+Ÿ ~˜¢!zˆÆ‹Ýé¶òcm~Œ.Û·<þi¼K µ{ì(L“ÅÏcž#fæ>· Œ#9á;†#Ô–×âK>¨ áÇÝ ü(’à˜²páB£Îo¿ýÖˆ2‡Š˜å°³LÆñ<Ë`¼g¦§¦‚ç};E,ƒiÌtܲMfùf{|·3e½ 7Þx>ø ot³}+ïñf™ˆ0±â}1÷(ÖR$M|.œÞ+>n%M.¹|¡ûö°C5ëp™%Æqiúq¶{ÜçTÍ{d:~¸Zö”¬ªA¥ßgü™mì+Ö$c°7ÈPJk¤ÉìšyØv'm«ÿúW ¾æ'Š3ŒUÓÔ¥ùÁá4ÚC–,?ZĆdØwè¯é ݱg~ìøA§¶#\a^®%I7Vå>5…f á/÷ùaeg˜›Z8:Æ%Iæq?q¶EÌHdùQd0÷¹†Á6—?!‘»ürñ+á2aG•x´&|—ÇKLmS4îMK×Äg(¤‰×¡¤©%äÛñ9þÐV¬XÑ*ãÆÃÉ'ŸŒ>}ú½%¾¹¶— —0±RæåËÔŠ"nòÅC²ãcâ«éb#Ë4LÚ´o«Ùù¢vÚv"] •“åå¿í—_†|`£ùqpÛ5°±>¦±(Ÿh“&~Œ£EšXv4^ìNÜ S{ÓM–™FšøÁ&"Ùál( @SKÄüÔ’(ñ‰g8шŸ÷Žï Na¼iøø¾à»åŠ+®•î»9qIa•A ?ünv*©‘r³ðžr¨1Ö¤‰¿­htHx=v&(ÅâÞ¨¦)([¨ƒÚ#¾[z÷•'Ÿ|R¦xŸ±;|~ W®\é[tÈ}º¸_|K¦EX('e. 'Z0ކ¦‰ í&Î6¼ý¶#¤‰?T+÷"$@;à δá›Þè£)ÔpDKËG-Umj4¯1XÙtì7`À ‘Ù _ý5æÍ›‡W_}Õ«bGƒ63$=4̧†ˆX‘1÷yž>µx>œŽ ‡¦HÎâ)$Ìü»Qh[k2bnvø¢Ý¹ lŸJÖí$Füͺí]¬¤)ðîÇé˜/?öíÈO²¬9½Ó!RáŒÊ™Üýÿí]˜Å®½Äœs*(("æœP ˜0c~æ„9'Ô‡ *èSÁ ˆ˜PÉ9ç|ÄK¯þÙ뻹½™Ý ;n«î››Ù =ÝÿÌtÿ]U]}'ÆB“h==ÇH:ÿh¦à9Aeî…½½ÎÙgÓŸgœA‡¯¿žÊLÈ †±( ¨'cY¿h_¼£x~á$M¨„4ÅòżA•n—4á…ÃsU ‘w+xáK˜¤Šü3M“1çã¨Ì>žÖ‚ž4ñø…ƒ4é?MÖ¨Ìt žˆ„  €šË„M/©ŒEƒ!ChZÇŽ4Ð&ùÔ§ƒm`ᕉ(ð^ñòÚ‹H8ƒ£‡ Öš5k4"÷'ÔôÊ$…ßxFèýã}G£-!žß <ÅN`o¤)VÍs Gg3ñ‡Ì;W3‘uäw9’³X,&<§X”xÐ4áFk´™ª7¡ñ —àÒ.4KY:xÑÀÒí¨7›7o6@¼‰BÚ/œ<˜ ‡ÉÉç©0JGð©à½Ð¨£&ÍúkœT“%Huíñ‡øŒ™Xà‰…ìºìrþñ8:~ñª×ŠgeqòùâãÕÆU+Tþ4­\¹Òöñ>CC(.øã`Á# àŒJï>4LØ^¾|9 6LëÕ+„5LRXð­dò\-xgñ¬`òǽàÏaæ¡FF`;V=õh4|hpñ¢)¸?ži, Þ/Ñ4™?/4ôBšÌñ–#Œ€šãËk¿#°1áb M‚êð±"+Pcd”†•}V}šB‘p5,Päë½lNc‡x·>T+#­”·´œÒ4cÆ ­8<ð€6rï)Þ"娌mhˆ`&@ãÇbøDÁ̃w ×¢qDE‹ZSå§Ó»woúàƒJhŠÂ% :¸W¬ Þ_4БÜgT±‡û>nÒCà L–ÐGS Íñ²Så¦l Ûá蔹ÉC¨kñm†òK›Å‰Œð –.‰)p”🅠”.6¸ß–‘[Cîü)”3õ¿Å®8”“G]G¯ •»³é¿ëÒàv%c[ù’s5Í^¬}¯IÅJ"?¢Š&8ÁFK"1z&–+E…;Â?iRhø×0­AûªB.~•³_Ð0yU»Q¢ÎJàî*˜£¡ñÁ÷ï5!… ê‡X…-X¬“&ÇÄãjÞzÛðñð¤4ÚÀ ¾ÁR|NÑ%J‹[´Ç|ëîi[¨[í²t}ç*ôï6sS4´Ÿðµ3"cæ©{{D4MÞâk+u4LëÖaæøè*k4(ð1ñJ¼nî¼B)Se¸óîUz¨¼€ H½×Á•y΋²àÝŽuÒ„o$QÁaZŠUÒí_¬šÕ·‚wÇvÇ&/Ÿ& Œ8sèu*)aÃxì‡9;’”|Bg3ó\Q‚þ­§þØFj¥ÓUè×uû©Vùà4DYA~,HðÜÆB(hŒþá¨ÖÑ&/I“UGðha€ûâ#Å/‘âàý„Ïפ þF^™Š¡M‰†é«8’Á!ÑÐ4áû·ƒ { Ð~^0‚•ØjmyU(»y ]ÀÍ…{ˆþÃÛÁX'M±®ii²mbåÁ3®Z³K¢[n%ž1™¶rØh|‘:Lå8´ Õª©{’%7ÍÌsú3¿Xº—–ì8L·w÷»hô©š)Ò„°<± ÁÞßXÈ_Bå‘W“•ZQBê¥@ý«=IUn4ZÀ‰XïYªQ»vD÷0aºúgÖŽò2K'€PƃkíNÁ ׳SÚ^:rî¹”vÏ=DÇk À“à k6åɺ½Ùt€Ã ´®nÏä‡{ >0K70‘ø-¤)(Û¸‡¡$¤Éhœ ^·Yˆ¹$ñnzEfô…u5z./‡rg}Á&¿1¹Å1äkر0yƒX7ÏE+vÁêþÅKØ<<±`º7Ò2>¼òñ¾Ï/¤ÿÝÙ†N-87\„ ÉŃO“ãNÁî¡´ãgÕÜKD#F Dtæ(Z[4™iˆl?D=ê8 ZŠtƒi°¬å.|g i –aI ζèÍwêäW{«D1Š#Û”½jXµñM –E/k4 蹩ß8Žmµà¸Z¶ºVWkœl4†>*¯jm䔸Gf<óÌ3š£µÑqu­ÝtqÒÃuè b™k4BÀ¿±`£`ôå Ì Èœ#Q‘g88BˆgƒeàÀtÝuºá¹*ã¥x Ò„ùá¼– ¤ у•$§ª-ÍAôryŸòv`Ì–_|eÊQ²Ž4…òÛQ×Es÷ÕM@¿™3gjfå† Ú*F(Gp£ÄŽš<‰~á‘W½¾úŠªòÚ “âÔßqßø©yDó˜0…oFÎ⥉Ÿ&ÔÝŽ¤Gw¢?"šõ}ÍÏò´Ûo'zöËIáÙ™‘&±lVÙ¾– 7GºÑ úlTx!MF¨DqߪU«è…^ ¡C‡j ±qÐ`ÃÏ$¢oäAð;((Ò HˆÎiP‹:i(R¡ÒAϸ"gF°#7FÇ&Ô­h¼ŒŽ«{89†k@þ° W¨'‚j?*`ˆµ¾\z<‘¤)S¦h x ð(-fžÇ47v$U®x^G]ÓÄ~,‡îëZarûÉW£å¯Gy+ÿ¢ü<ÿ·Qx‚ÁÞ;7„Ä É°ïÂwŠÚ©œp ZÇä†n°•zôvã“Õ<ñD:êûïiÑ}÷QýÁƒµÅÖMu'£ƒâdH?h4ÓÈ3„l‚Ôź#8êuÕFè µ·Ù½ÆeÝ{ÒIT /õ±›‚¤LÓT.ÅGYÙÎ4’ Mbž |¢:î¸ã´ÆùŠ+®Ð @ïÎÈ|‰†=ÞeÉ’%tÌ1ÇPûöícº(o½õ–¦‰²ÛðÄt¡\fï Ÿ—`·ÛÇ¡³R±€è¢qÁh˜@hAŠ7¢<2J'«§KOÏØFïÍÛMk®oA5CÉÃ¥øžb©Ö§påwJš~ÿýw­CàD ŠÎŒ™¦ 0sUºu£®~H3N=•ö-\HmžzÊà¬Ð»œšç>à¤{3ab7&Ï.tžÐ1‹e ›i75…*½þ:ÑcY&Må†gh$u*¤Ð/ëŠæ•Ëãï{褔u$.h•A§5«H)&Á3…4!*û ÀD¨0¥õD¼ŒR(O>ùd\” š¥Å‹ÇE^#•IôbQ)£aE¯R/ð7/Š¡i: 4„¸——ͼ€0e ]¯Í³[½Åû•@«­´{F²fÏj5r9=߯ÝØ¥*äø/÷iFƒ HtƲ€49ñ§ƒötøðáÔ¼ysðãyÙø9%”eø{éýë¯ô kªpÝU笳ìÜZ;@'>MŸÎâ¹Ôزä¥@s]œÀ{y7çi+m»óŠ®<0l•ëÝ»hGˆ-Jà„O ¹ìÆþLÏÏäIyùØ‘Ü|zêíôÏ–Côrÿ:ôöœ4ŠGÖM<·á„4Â";ÐÂ`‰.NLÑÍqdî1O’¦1|ûˆP.«ø7ŒO›¿ü’²XÃXcÀjGS ­ü§ÌHÓmUøÆ®Õè¶©›¨ OË`EP;!$VÒ×9NÍsï¾û®f>Foš*4bvÏÔ)iÂ}’Ù¬xôäÉ´vÔ(Ǥ æS;2OFx9Bà©@ÓÂë Ç>Mú½ð¢s©Ý·ßê÷†ÜÆóq |Ž•Ë$SfÅúsÓAêU·=Ò§¦¶ Á£ë•£Š/.4M;ÖHSüÛ{L¡–‚€sð¡âã)Žœ´­˜è6³cðŒÓO§ý¤²ÍÓOSŸ3h%Ð;¸~}ñM~…ºÏä Ò£½kÒîC¹4oë!:¾ay“”Šï†ÇÌ„PüÌèýrbžƒ£ìÇLwÞy§Ö›×ûëY-I8eÖ²eZ¬&«÷ÔŸçÄ| 'prE}*ÞlC -«—’u ‡æ.ÝMOZCü—¦Î´¯iråÓôÛtÊ=ý âQ0ÔxÎ{ÚJ3-i2›Þ†v®Jwý¸™v,î¨MÓÀÆæ¨XòCGp/¿‚R˜ö¶]‡èÛé›èòÓ”J),$ #ëUÌ¥³¤öJtd[AR î¾›¶²Æ¡û'ŸP…- o€ÑUV£C+VáÅjΪŸVì§>™å-OËŽàÐÙmøž}öYºà‚ ´I•q=Lüv%TpB³ôv²UV÷€ Ͻö:ê7Ÿ½²̾ŠPÉÌžKtsQD‰P§;>Ž™¹`-X¹‡6ñðûª•ÒhÈM(-µHoqÏks©ûåõh[ lHì“4»š&oÌsê|[¶¤ä[oåXMÜŠýšì žŸ™&·'4LÇÖ¥þŸn¤ÛOéJ-eÒ—-§¦.¤ oÝLɕʑ/…Ë\±iLÓüš=h@­tÔìæÕéùöQqz'¹Îs¶í:LË¥ð”Aözv2¶sïúþØ"Msï¢ñSÖÒÓa¬=á|¿™§‚)ŽÈL¨ ¬r÷î”yÉ%Å4©|-¦Ú°"¡4M*¹¬eêZÇzc‹Y¯õð‘\Ú¾û0Õ«Ú ““›GøÞêT·gNRùµ†‰ñè£ÒìÙ³ Ä ì0ê~aJŒüLÔ1µF'øù¬©sŒÖ9Lv¾«VªôêIýæqÃÈ’@;&0¸#DTQ)ÜÔâ% Mn}šöíÏ¡ré)tvßLÚ½ï`.îTþÆ=]-æÆü4¦P„¹_^‹É¤>/ð'„ß!"›íeß´¬þýµ‰˜ÕX7Ðo½{w0Ž b¨¡Oé{¥}1§BtjZYj¶z=}ôÑG4gË6ê|ÆU´çÃÉÚ$éÚ ÿ¦ ‹c×älœë]Bš\C; Œ›¼†ŽîPº¶®êY¦×­À$:ÂÃGõ½$Ïnh!ádÎZ®lx_e|¨×^{­…»'Ö) M¡ÍÕ;ÿü lž8‘jrJ‰ýf;¬3\ûãšýt^ËJfɔ،4õ:ŽéXn¹¨[%|T; xÏw²ÓyóEætNö 5¬SŽú÷¨MWÙ¤Ä}ÔŽÉ36Ó²uû¸±L¦FuÊS¿nÖ¦‘@ÆÇ iš3gŽFš®¿þz­ñY½z5aÀæ „Æ #ê¦M›¦²t SÈK ÏZ°‹j²¯Úñ P:ÏbÊfm§²†çnUÐìƒ4é%‹ ñG*Ög¼Ó§`¼LÓôúø¥´b} :¹uoc^÷žÐÃÚ³7ε½pþEšêrRXô‚ÚÓØ [Vèmh+KŒüäXi„¥@Z²6 Dߎ`d†`o´%¼-M´K“à÷ošYþ˜¿ÃS҈˦%zÚ±BšÊsï­I]k>-V_‘ú<Ó7‘â 1µË'—’7ó uãÞ¥UÁ^Eg6¯D—¶Í :LTÌ=a3ÒÔ‚µI/ßÑÅìÒÂý˜ c„ò;éÙ®a±+h Kô؃$Ò£Œ.~viúôét.Ï%fWjËI™ÓÙÔ3ŒÀ@Ã…÷ËŽ`Ê»©;‰.gOënì£.æ©;[~’E7±f|„}øU*…ë`š¦;.mUx^´7ÂÜÒE!”†ÔE†—Bj·³f˜PvÆF«†‚H,pïaªî0T½üjW³îCb']97ö@£3ŠUÉæðëÇŒ¡Î£G[½D;÷±2ü½"k=ßX—¶°¿È[< §É[Ëhñ5ͨa†±-Ò•Ê7 nD¬;vl°äMAÃm•‘ [R¯ÀÇaâI“Ú¼à8½·¶‹Œ3üÃDœ8‚g‹×€¯riÀéÉÔÓ….úÉ/yÆì¹jÍx˽ÀäeÅTêþNîR€¦',!fdßKM“Ãl…õ2!Ma…3º‰ÕgVø_ˆ^!`•̨û/¼ÿ~ªwá…T®= ;÷A$ázSéñckÑ +ÐÕßm¤)5RY(¶†ù½ñ`šƒbD釛Æ>#k×®Õ|“ì‹„ó¸YðÏþŒ¯Ä®:òQ“— L˜Þäjﵿý&¹Fˆºñ·4£b \z¡ —ÚÄiŠ‡Ñ´ MvG^Ú"ÄÉv5¤!’+<ŒÁ v:k…z°Qìò }I2‚4‰Ä³±@IDATf›:AÀ+‚M•xÏ-<'Yξ}ÔÈÁÄÆ0ÏYÑ4Þó±é[©RˆÑ£Š¡‚UV#”C¯YÞáÛôóÏ?käÐì³ýpŸÁ1µ¢!0ÏÙ%y¾¤djR¹6=‡É›•!_Åä«Û’|xØzyv¯R—’ª±ob%fQ¾`ÆÁà%†ö&FnÏ¥NR¼h‰›÷6Xžá‹hÅ\,pÒ.$%A @¥h¥Ç—ÏÚVñÜU­N›£ˆHáì¼ç°zòmT¿R*>µ^ÐËTÚð߉UyÎ,4‚•Ò„˜FÑ}§Ò`yFGÓ*Å‚iŠ…§ yâ`qŽôEØÍS0lùî;Z÷á‡ÄãÞ){×.:ÄŽ¹yìPëcóQzݺ”yñÅTÆdB^UùÂ,ª±zs@:“ SŽ…¤Ä—Q“RŽ¿–’{] v®á±cÇŽÂß±¸òÛ=®2ÔáŽY ŸY¬šÀ¬¼‡ÑÀ,ðž0ÍFÓ§ ï­ŽàÐ4ÙµˆM¸~ i ’’Ž X%M-ʱG.ˆHS;WkÖLÓ>¦«V±‰~ŒUIð”­0àa;q·ïOykæRþ^„0ôËü“j4¤¤f½(©ÓɔԘǟ›¤P±ÒkUe\à êEãxŸÀßx¾ ©À8ER ¡ˆUÒOæ¹hkš¼xoÅ<É/QîUêX³i?! B~.â-hT­ú,@“äFJ&vöM½ì’{¶P~NÁHQöoññ|U”RÆÒí@BMc)!OŠ–yEB€É 6Dœ4˜„Ò*zyФAD¼ž°7h,~N&k¶˜|ÈÓ@ö½p؆ÙÖ_ÈŒÛ8A4M6À’S‰Öo9@Ux²Éòaž¶Ä ¶¿ý/Ú»ßßÞ°v7÷LäkQ1‚8am$LaŠ‚ œ€p“ˆœZ¯x¨¨Ø>pÛF¬Ÿ@Q¤)p¿ö;CÍžex4èN¦õë×='Ú½2sX)W½zõ´GíÛ··rzXÎ)2–I ´`0}źÀ§)f]… ̘ ܱy&ptN°ìæxmˆ⃺ üÄp>Ȳ2)"ÿ0ŸãÅû SSŽ`ŠóÍ⇩{Gj-¤)RH—‚û`‚ÒçÇ.¶59’ÅmݸeZ˜d5’y*Í÷BEHšZrÓ Böçía (ir‘>œçÏŸï"ï/…F/ZÎê5ÙÏlÓ¦MÞRw4 f$\wZÔ6Ѹã™Äº@Ó„¼†[ð.¢£B„5FX*B„5´Kð…ƒi÷G¸˜Ö±4oÞ\#@xÆXZ ùEA¯ÙBz¨c08`ÅŠ´dÉ!Má~¨’ž÷lÚÎ=„ùÞßÈæ0©j^¾Í‹ätÇ ÒC¥¨÷yqgˆ3ΊW*yTâÑrv6.iɽhH¢5Š ±š"´#±jšÃÓˆåü©7>avã4 À‡ DdÏ^mã7ÞC¤ ³­Z6lHÐDꉣ">Ø·ß~K/¼ð‚Ê’íuƒ@¸ÐR~úé¶ÓòâÑ4yj”Òüéï­Ô¾YU˰æÛa7›šW¦ýsh冬˜˜œRå㶃´pÕm†zµOÖÞ!)§L4R^8•">S¬¨úÍžR4}š0‚îï¿ÿ6Ëš'ûcÝ<O¤ ïögœAíÚµÓBWÀô­ ˆ4F0‘ỂÉZ"|Ïоâ»À4:uyd+QÿþýµßØ­¯A‡ ÷ · å$Øm¸óô„4yj”Ò¼ïyôÐÕmiàÑu<ËAÇU¨Ryx¨Ä޼vw[3ÈÇNÎã3'¨d­Äjr[:ø4xó=âX'MÑôiª_¿>­[·Îíã³u=ž3Ê«ÒsR¬‹2w]}õÕZVAŽoeƒfßH9ö!fY8}µ¦ß,b¯¡Þq²Jà¼zVBš¼B6 éŽy¬}ŠpyñüÐðÔ…<ã=YÅqŰ| 0À‹[†%M'>Ma¹qÁóÇûm‰}êm„äþ‚€ P 4‘ˆ™‚ª‚Þ*ü9b4X`Tåx Ÿ"8Õž{î¹Úð ø€ìAÓ‡ü«Dγ8†‘oXÛø6ãÕ«WÓ%—\B;w<öß(ÿ¯¿þJ#FŒ [o½UkA Ñ@b¨:0Áoµ`¿"4¸£Ú‰±Sk¼CzÍG ñƒv dïÒBºè€8"]`½¢×ªUìiÚõeF9J£àYàˆ¶iŠöû q†€"^g ¡W¤ Ú 4ÈÑ’E‹Ñ•W^©5ê(#Ȉœq¡!jÛ¶-¸¼øâ‹Z£ Ì#)µqãÆˆ&4ô0GÂ×Ì.I!R± ±R ˆ,J@’ô¿A¢@¬”Éùá 4>üðÃZÞŒFv©´carÀÄ-¾ÀÔ AgM4M^ +i ‚€§@Ms…ׂÆÔ+3 ´3Ñ$MÐ=õÔSÔ½{wM›ˆ%æÙúá‡ä%mU$ÁAÑàÛ4ÐÐ@`áXil^„Ø kR½fÏì<'ûc…4yC "ׂ@\ Ò 59Q˜¥¼h5"Q³¼C£Ô¯_?ÓÆš%˜Š¢%ˆ ®|£"‘˜¾Üš#‘ÏX¿‡Û±\>|±v@Ìs±ü–HÞDÀ­–=aøAÓƒm#µ@³„ûÿøãÍD„Q`ÐD`ÉæuzµX”¯ Ö83 ¶‘×Ï>û¬Š0ÏE+xd‰Ìì€æ$š L„ß}÷AμÙ+Ú"÷Ø‚4yÕÑpŸ;w)@ÓÍŽŽÊ½&…„¬AÀpŒ5ò-€fdóæÍÚ(.Uä¿1ò Ìn -0QÁœ*{åÌ \T8ŽìS#¿pºþ(å«‚5Ž!=˜ @šŒeX»v­Ñ¡˜Ø‡2 ÿ ‰À+Òy<¿H Þ'Ìö¡Îy8[à Òä%>âÓä%º1œöÂ… µÞ·•,‚ŒÀ†‹Ê]oÞ@°5墂ܩ5*YTv Êìm4@ 8†•€ÚÆ•¤:k|xXCeŽmu­þ:E^‚•×鉋þ\9S¤N­á“‚Æ¿AØ@ìÔLPya­ö)⇴ÁÃçèïü¢‘W¢ÿ­ò‰}œ‡m\ߥKzî¹çÔe ¿V˜j3—C›ä• ±Æ»à…`tâ"Ų Ñi‰–¨° ‘ MøvÑ9‰UAÞPÏă(Òd˜×l¢ü•º#<`чš>ݾ0m®K´õ{„|ð'˜Ä–ôôzDe3}T¶>Q™Z¼ÝÀ¿F}½råJjÖ¬™Ö.eAu"ŒŽEr_ì¾¥‘D!B÷BÀºnݺÑÈ‘#µ;¢ñQ~ Fè( ˆ Tœ0k¨5zøF Ì ˆ~;BÅI¨Û¼ûùœô$,¡(,HÞѰ’&¥,¨ÀÑX{5‚ ß 84“±((4_QÏ s†‘t^ ê=tÒBIî«D¹¯éΪÆÛχ©áàÂÆ!­|•yiC”Ô¾€Ôæý.¦ÌŒ'Ò„÷xIþ¢Ãñ9}U§ã'Æ*‰c™úZ`–n”Bð}ªÓ‰³6MȧŸ©Z]7ÖÑ"b;qT-ÚΫRWѦÔMT>½¼Œ¸¯™,£Y4‚†òkîWQ³«-u¦Ô”ºPUªKUˆ#ó»ðjI줴Öòÿe´ŸÅÕÿñœD×rWÖì64sæL­ÇqÎ9çÄTcÖ¤IºãŽ;Lóh@:0m‡¥ɘô|¿üä³r' /+ Öì‹c]ŸÏ½`HÊý¼ 'í}ôRÓ¢œ£ É_Zóÿè 91Oæ³B$k)ÑÆxßBn$·ñ\sø‹”¬Ü*r¯Ÿg©}¦*w3ÎL˜ðkj×®ñ aÜ òj…4åïäb,×ݸp;‡¢ûèwþc½45ç¿Gø¯×PäÁÌ™>Ð]gsšxhCâA`U°3ò˜îùb;}ôÅG4‰ÿx"ú›ÿ^)û ͼ… ev9ó1cHº“™d`µ†ÓYÔŸ†ðß üÇÃ9¨ u£c³›e³Ê‰•ÊeètÚöíz¾Y êz©ÀÐÚ)—WÏŠ!‰$O<ñ„fúAD`%c™È»g:—×O Ã^TŒ}úôÑ*G3ÙËh9Í.vxýUì7~°%ñ_.ÿɱtqPÒôàƒÒûï¿’0Áh¡¦…Aí^ø³óL@p¡ñ#€÷š&#Éã‘êÙ§É×7ÖE»Cn¡’Ô›ç¬dbůUÎ>^›¤ @Jn$™ÿ'±â"š¼ ¢œc•4…l$¸r´›?ÃýL®Ï§¬Åþß¹‡{øð.àX¹›ñ{ R©° Mè0:“\º‡î Íü‡Fõϧü׌ÿÖðÇZw–¬î*hš‚‘Ý©QßDçWÿÝ„ÊÐ Z@íø$sÿ±w(“ÍztëÁ[é’e—Py‚:¯HRYƒÇýeC ¥‘› µ 0¹—3¼žƒð~c“?¾/;Q†2Ø)¤É¯wÝ|óÍÚ+iZ:œhÑCö1±Z øØq–Ç÷m,ÐÙgŸ­ °z/£o´ÃgÑ@šÆ—ñ:qáXÌL^áH?œi€4Y5c-¦èVþûÿºj]O¢Ku¢'ø/0…Ê'4r¡ëIcÂD´Šïǽ¶[ žê¸´Dcp„Ê“&…D×èQ=:¨9Le'_ÓæôçO/4N]xéP°Îä5¨tÔÐú|ÍËl¶ðF¡øèQÞ~¨ðwàFëÖ­iÙ²e»£òû¼óΣóÏ?_ëÕá´#¹ä .Ø5kaMHF§N”Σ»ç¥1ªVI´Lz{}`z‰ös¥pØOÉúÓ™´Ösò ¦+‚^ŽJR9ù›ª‹t),çoàNêÉH¹ ¢pÁ{÷üKxAiܸ±VÙ…+ÕÃ[ü#"œ¤ÿ*øPüòË/t 'ØNbÿŠ´øQ&‰¬Je¿°'ŸB'ïØNiìƒãF iÒ¼s“Vi¸š&øŸY˜j_£—i"}ÎF€Ô„ýL¦Ò¦¬IÜ´]4 4Ü05¤³Ç\0ùõ ÏÒlThì´bÇ`–›=»¸É¹Ø Qþ¿F;±’ÖÓbFøY&¥K4¢”Éü ¡m©ÆÈæ:äDÃéGK%Cã‹ʪ`>ÂÅ‹kƒ%à‹e•0!}Œ”ųv"[i+—ÒïZp]H™ü×AëX:IÍøx=‡Î†UóÜOô;f¯ã¯f³¦qúƒþ`7íElçÉn†ìhhS -ijt" ¿CX»LÐnbÔk‡PDGâZÓ”³~#åíÚMiíÛD=‡w…?H¸|0 –]’è5îhñÀÛƒnds¼ 9¸H±RL£1<\£q±}Á~àþvo;±V÷÷ß×&~ã7èõ×_–|±c0'9õišÉÚË×éyúˆÿàÏ4Šÿ¾â¿p HS4}šVnÈ¢ŠåX‹s$—êÕ(ËZoŸiñ ±³ªíÙÀûrþIº€ÿÞ×´s¢†ÿöòŸ]Á3t†Ó¶@-¼-Wš&»ù çùqMš²ÿ]L9‹–ÆiB0@;=¸`|Ä¢?gðXƒÁÎ ~ ùÁH(;ò+²Ø¢×øf…Ãøvª{Q¥áìy‡!ÇtTºvœ¯ws8‡?Ï8ƒŽš<‰jrðÅmS§ÒïýOÔrá–0!‘À˜O.ŠW*.…yΊd:¥Ø`r€uÊ¿Ξsiž ‰z®ðw©\¹ºá¹éEn"ïb#ÆǃíD˱š`ž³CZÎdgh#Éáq¯¿Óÿè"zØè°á>Ü_ù”ž°óÒK/åÎÛkZX•W_}5àhðŸ·â”4-¥¥Zcÿ½ÄÆÙãxûz¦PÏ¿¡Í£^‘¦gm¡*Ó¨e£ŠT.ݼ9¾òÑ?©{›j”^&‰¹¦]PÒ§&ò+ÖððÐ ^ŠÞøÁ±Þ®8õý\L_r×êÔ·Ã7ûÏ?ÿ„<ÏËÌŸ’—w SÚI•(o¯õže˜në:˜³É­°ûMÔ•5L\¿98œâe‡Ó'LV$ó ÿ¥Æ7Y9ÛÞ9 qÐdÌŸ?ß²vÁÝwS ¨J÷î<¢h)-~øaêôî;´'ʘ½’ÇÏÙ MVü]ŽawûƒZ²¨l²  =udƒ›¹öH¬×ü.ÝN3è z‹ÃZÔfóÌWô 뱪©KC®AÈ@ Ða@ybMB9‚æ×Œ0á¼É¬I¨Ï½x„±*¸?â4Y•¾¬Í8q¢VÅŽ9Ò‡fš-'ÂaVé6:*©Îæ¹}ü—ËæÈdÍTq¾5*ÌIÊ9¹yôÀ›óéÜã3©J¥4jT×¼9þù¿Ö]`ž 6H£d^Ùd €XJ‘’é4®¢ÐD¤)’I•¿ˆZõ})ÌŽsÙ>B¶ô@ÆOqVä£2²£v7ÊÌr Ve)ßwç¯ ¿µÕd σ¶iÆ V¡Y”í?N£¿þJ+Ù$0µe+jÅÀ@ãtçI ?0W‚`ÛÕP"'Ó‡¬èg)SÁzÍe9&Øößa‚àx`áÌRºê$t`bUÛd—4©2®w° Z¦sèîÀCAÃÙÖn=…F b§S­ˆÓ‘sf€óþ — ÁÌNûæÐŒùÛiÆ¿;hêÌ-´nË·NIN¢©oö£;.mÅ„Éa4º òêÖiš¦œdS0Xuƒ=ÙÇvó% B^† ºÑž´7®ISRÅ ”·'´¦©Üà ÈWÞ™“aȧèà8<ÚQ{ÝbèP¢öíù5ã÷,¤ áìŒ$ê·”½&øí™ÑŸh&kU×â ÙF9u¶¯cÇŽ¶œtOÚ¸±ƒà!ž™½ÿòeTãøã)9ÌÁùÂ!âÓTE8ú:!+i›ÔÞ)™ ÁhÌ€AôáH+¼ŠgE,ð‹(ÜrZM'e™pN5zÎê-¾¡7˜¢f½žñPn³tàOf‡4AóM >œ6ñ·hE@šp¯pJ'êÄ#5Y&y6ŒõÆýôÑ÷khÎâ]´qûAJI6÷9Òg©l»Cµñ6H“[§õêÉA.í Ë„9˜êÖæ ’ÓÙuï¿øœALœB+þ,e ó½ÁM%ir¢išÈ>?0‚ÇÔ…êÍÂ,bõ[á ý¦JæÖxFãXmÜS(Ú{ðpèç¬ßz€ ™%˜:ÉŽO“Qz›9fÛr&}8¨¬]A=Œ(¦‡‘ˆèìtgùu×]GÆ ³4’ ÷pn 0êw7Ž4=—ÿÂ%j§Yzm›fЫwu¥ëÏkFƒOiDuª—5;Õóý0ÏYýfÌ2S“C‚îä¿HÈj~NùÏŠ lx/­¸XIÏÉ9ñMš0¹ª…(;reÙ÷É}Ãé`£kÐkq¢i‚õ¬r;_7æk¬Õqãǘ¯PÁ Ï×ÿ®ÐЉÓsþÌ?2_ ‡Æ ¦ T¤vˆœ>Ojjî ? ÚNHS:ø„}‘&ÑwìTž l‹M3‡JÒLÓxFß(góÀcf¿až[Áa*ÜÊž¬#týSѰfÓè¯ÌÍÜOZH× ŸEÓçnyKh7A­LÎßsŒ¶FÜ }NÏð`î³8JÓ1ì.Þ6±¶/”àþÀßêr8èöìÙSKö /ÔÔ“O†vö‡?S¸5Mˆp½ÿ¸Kª˜–Žƒ4¹%"–n†“ì<3³ÛUf3÷Ì}dS`ž³Ïaj±ÁÞª@Û®ŽŽÕ{êÏ3÷<ÓŸ«ÛÐ4Y MIÜçìÞ3¥irR®\I4öCâàqá/  ,XàΠóX¢.'Úø Qæ%îóˆ<4aíT*± ¤©J¢>Û·Õ;Íu|\çi{§EåªÄþHÐ8}ÆqÝŸäqtïópq#qæKxØrà^óߘ;ïwB› _üp mß}„¾¦-Õ¬š^,ÑŒ iôê]hý¶T&ÕÜäòðµí(9Èpñb‰ò*UªhÎØ¡bíâ‘N[8¢òf&Ck™,l`"ŠßØÙ¾à~<]ÁMa-vŽNÑ–À{ý†yu•Ÿ#t¼ôŽã÷Þ{¯k æÏ`qéÜš’ŒòÂÆHø5ýÍCTàF£ó¬î ¥i²šN$ÎÃ7cµ£a–Ÿt~Sök±óøëº˜çìųËâ([¹Ýœ—=¼üÍAYh„­+›ßn«áNŒ®w»Ï:nïäÅõ MüâMðR;©(ë10Á4…›[Ažì¨âÍîWë,ž‹ýÂ!púsÚ(«ûWæàUÛþYýt¼F/ÓJÃáøqx!F8Úñ/ÀŒæOÒãÅüŽvrõ˜Äy±±¬úHø=š¸N°!Íš5Ób”…"f_þ¼ lHOßÜ¡aR·«T!•Ú4Π¦™æþ9vÒ…‰ÎŠ_Ñ•¼q˜àô݆G,^ÉCî[óúj†?˜uLÇÐùlpiÈ`é– î²`ÕDˆë“'OÆešàÙa¨~¨Ùpž’þL'ó_8äÑjì£pÜÏMÀÒIûxÏ*ì—eÓDgOÓ”Ëé¥ÝvOÞòÉ:§o´øm]©o`v ;Õp&àr#¾5MÍs±Š!rå4–…ѳ†#øœ9FGìísÒ“Âä e±_•®Ó½ø?Dõ¯²wo³³ARœ˜1õéU?öXZþüó´—ý£ ur* M –"EXÕ¤®€sézŽ>ŒÉUoáp³yÚ“[èzžýj‘:ÅÕbÊ»uÿºuë‚νøÓ·›lXÎÇE+Ã÷ßeý¢~+YÉq°¶ñdµÇp7gt«ZqDð3f +ÿ}ZpZÔs Qß~ûmÐ,„³>Ôßh ÏA÷ÿÝÅÉÂ[è¯ ÜFýˆ(ç‰$-9N8üÂŽ!î­[3MSRªÝ6Ö3%z\)·‡µJøÃt? ùï"md§5¿¦p‡¶X¬b§¹&Myܘ$±¹I ~çmÝA) 3Õ.ïÖ BÐ6…«p¨cBÝ' ÇV 3`Š(ýL!I°q£t•lÁdª·˜ÎÖïx6{)ÚòQÿz wÒ¹Þnvwà™Î~ƒ¤„£w×pÈšÖ¡#²g7¥:ts3̓³ÒÇþU v}ΞÖ4 M˜(ÍfÏšžúrWÍ­L ‹‹AŒ¦}ü05j’4Y¹¿çÀäe…4é ò1‘5Li(VÜ©¡Ñq±JšpßÑ£GÓàÁƒiìØ±ÚÐð¥3-”ÉŽ¦)‰ÛÕä¾þj£wµQêà2¼äcYÎË.þÍR†u'ðßhþ»Žnòïtø¤)^|š€§™_‘ëç4ž­!-i¦òz^f›.Óë_þ³Kš ]„tùÀGßõ»Éå‰röÕ§j“†P¹U(yK%m©Iï~;Xñ,ª\¾S±'“ÄI$³Â¶¬‡+I±›ÙüáŠ4å±­{sË^TcÒ§”Ú¦%å³­hû©SÆ3’¦Ü›iÿûã({ñ2ªòƳ”TÑ —°“¤%Ò®û…){*ÌÐ7E0ot‚Æ'âCh.á¹jérö/Bh+‚z|fR‰j°oÀ@-Úîþ uÎ'šZŸÍqË88·*ÇŸ>Ó:ØqÔ‘ iªsÖYÔáÍ´’£·|à³"ÝÒ¤*ƒ '&ÐA&Ø)æ¯{2UbR‘´¯ »%6å…§Œóñ»ä«Éý›ÞE â ö^É#X“µ‘]Ú8ƒ/_¾œ0}O¬ ¦8‚ÌŽÌd“ t ˜Žºl]6ƒqä•«¹Ú,Pȅ⣠VÁU¡q²3o$y«~0IgóûÁ‹©àÝ!XÅëÍD·î¼•N{ù4êR¦ææäÜ4=Ý`Í·Ó œº¤<ÝÄ÷bŠ'ë7’Nñß>Yµ ûù}`eoþj^¸£¿3ñlðoúfÅ7”—Òlÿ5\“¯NÁ¶Á ¦\h'!x×ô’Êßzï‹zîB'ƒîûä>›ã'W…ƒl@ùI£%®HS¿D︑ Ñr4í9†’jÕ¤2Ý;kåÁÜp[HßK¹ÛvÐΫ‡Qõq#‹•5wÇN‚£¶/ÍÏN‹ ñ,/‹Ÿx(FªÀ<—·kW9”\½Z¨«<;n¦Â,º¡µ¦¯X»ó×_Ä•>Q߾ģWˆ‡û²#63t;Ê5³ž‰ÊSsÖ <£ G8Æ3|'vgn¡¥«ÑÎýÛ¨Fó J1p€…E îÖ¤Fì‚Úˆ¯¶V6Ü•)#Iâ `¹ì!cE[e&‰Á±RHSqp0² þpÊ„“_žVç.þ¥憼¾?JêS®<5²Ä$‰_Ž ¬×xa5Ž=—§¶+ðkš…ðú1(pz‹^E¤_ý3¨GîÔ¸«ªôô±YšáoÂÏ$Ãb"ºÓì†Ð]jy3T‡ÍrB8Õ¿oIxçX0 ì½þïÑ´iÓ\“&`¤É_rÿùÛôuó/ú«úîC'¦ŸHϬxFE©?f¶z2”fQ] ÍÝÏìoj7?Î7쬩„=^»"MÈ[rƒz”·}eÿ»ˆÿ6ƒª¾ó²–ehö>úU~õ)JëÔŽöðvïX¢©-]úQJó&TóÇ …G²—¯¢äzµ‹™ý ê6|l¾É·@š²W¬¦#̢ÿÎÐÈSÕÿ¾¨K%7ð·ÿÓœ™ã÷Œž~šØ,B<£8Ñ)Ük¸æÿd½P¼9«#d~•¥:ÔL[Z7dõ’G‚ ÃgŸWyù‘ÚÂk ŽÝÇ‹žB†Wæ¥6/é¼ìæe%/Ü™Òûô‡cL\¼’‡)¢wß®]»Âi\P ‚Hq”¶©ijÄ ô³ÅÏsúKõ𓹇ܿ)Pe®Ü¹aD¯µ|36Á0é‚Ö$'‹¨ÃŽôÙ½ ©Ë=I”wÈGœJì:´‡J/œÇ¡iúþûï “¬Æ¹¦·ø;i5|T‹ë‚J­¨kxÝpD·k‚µ{pkÞïß‚'æÄâVðý»õ±t›+×ÛéhK¦Ù“O>™ã^O§žz*sÎ9ÁN׎ëè^ Ô¿ÿþ¸;äoÔ9v „LÐæ .šYÿ|eÒ´À‘û^ù/U¸õ:BL$HΪµ…Ú­'žKUG½F©­šû/*ø¿ýœË¨ì¹§S¥n/¶ÇC¨Æ7ãØ éO ó!ÜK/¾Šå),ÂDö½ñ.š4™]¥0¹«ôŸa¬j¬J¾›«ÉežïÆ‹…^º¹œË‡60@eù/³yYÀ„`¿ÉI'ù Ïi¨Mòñ\çD§óÒƒn]xÆ©P‚s˜×•,Ln !Pû®1ê Ì’%K´<áãUÁ%Qiázäk,H3—ÓJá4±4qþ^oâßó Ò1T×à+X=‘Õqèéâyàz1 jS¦LÑL6 ¤‰|ˆGCÉA4ÔÐ AƒªUÄüé7ÿOð;Te£ÔÔ¾ãç¸]Ó~á¹®æÞÅq!bÀ€4é‡Ê¿KdbZ³˜4ÕÙ?·ú ~ÒäU®ð=D" U¬Wå´’.$Þ“Æ[9=jç Ž W]5çó|žÒtèÀlÜiÛ¶­íTñPGªÍ Ì`°–;ð\ãßLŽÌ™¯‘´nh×5Ã({Ñ2Í\whò4J®[‹jýú5%ש]âz›÷rÙ„§7•åsÔÆ$6%שUx>´XÛN¾jÿûa’^%>nè@€à[Sa ämÞâ7 ®YG0êÓ <7’¿Ñ qGEmca纬`Á˜%~ ¯WÒ1Gïáûô‚öS%{€ž^D„ÉÀˆ}jkõ[¿­öáZìÇeTÄ/9ˆ œ»aæ1Á‚;p L âÅPelÿqιC:øx–¿Azp\Ÿ¶Ú‡µº®Ã‚ßÀ2Wö›‰Ê»ÙñDÝï&(ªÌB™n@ŽO?ýtúᇨW¯^ñň34öx¶8JT,¤PçEã8ðf¯ߘ“¦v°)Ã}b]bù=Ñc‡º<\~W¨'ïÃÔ6ļM³‘HˆSÑYÃwѨQ£g†ÿ°{Ò”“K‡§üDÕ>}Okà*¿ü$íw,ånÙ¦å¶â=·sþÎ^´”R[ûU¥5¾úˆ65íJëw Úó~¥¤*”¿ÿùÊ5^8„‰x*”ÍízSí¥3Û%7ȤÜ5ë)© M=DûöA&M±&ÊÉÙºv¦‰ÖKQiИƒœ`!±YY[yY©ÅVñÁç<µ „ßø°°VÄèЋš:uªöÁ!_XðñÁR¿`ŽÁŸE‡5ÎÝ_‘ET°Ùy±#˜ŠáÊ+¯ÔC;×yq.H¡Ýü{‘XKšPqxÜä0È™€L?üðÃôé§Ÿj§à´[iÃLÐ# ÁOÍòèf?¾+”ßm¸B;ùÁw‹ºÂKAÝ/¤)Z7·X£NÇs‹†à¾xW½¼/ˆá—¤éàד5|R[ù‰œ³+²™r´¹í1Tö¬S4³]îæ­”Ò¸e</Q¶½³ówµsiKÏ´sÈÍTý‹óÄå,^Fyû²èÈÌٴ󊛨ê˜7)½ï1´uÀy´ý¬Ë¨ÂÐ+¨ÜÙ§ji$תA¹ë61mgÀ¿|&v+Ò€$ÂþSUF_±'7È  4*¨@ •(-|zòƒŠ çá˜"'¨TAZ° !Pk4$è%Õ¯_¿ð8Ž)¢ƒ5F#`¢Í§á0#-•þãßùŒå÷Z*ʸÚgù4Ÿ–tÍ‘IJ˜³—G÷màe3ÙÉË.`óSæ%|mG7-y`^LOL°ðû ‡Ç $ʧ)XjêŽ`ç;2üÏY«ó!üŠ¢AšP·¨úÈ+|PÏŃyZ?+F½ÂÉjºèà¡~† #‰xV¸O´LêöºþOd¤üåR¥‡ï¢¤ÊEf3uZùÁPú)ýÙQ|'ås£ŸÄjs&%pÏeZÞ&6£}ø¶¶Z¦Ê/>N[;5O¬fÿý;J©_O;VsòÿhÿGŸ#H)ç•ãV1ˆ¤uî@ûÇŒrFä¡DC ›1†:Üû.^v!ˆžªLPyÁ|†ã`ÙJ›®œƒ8©û¥™÷ïe‡[¶RÒ^Ãz(ÙÌEØ•-/ Ù È â§hŸ”›øò>¿-¬€Þd¶ûo¢5ï¹z/}ºh;9ÝGiìžÊ¯!†ºÂ1ø¤#ì0ŸŸWt^àmkŸžDiBša±üï±—=ËPš&ËãAšÐ«Fh„H :[VâD¹ÉL°ÊDî&¯¯E½»C–c\¢©iB}Ò†çLCìB«ñËÜÞÇèzW¤ >EeŽéi”ná¾äjU ‹‘ì}ò%öiÚ@Õ8 A* SRöÔ„ÅHÊ‚“t‘@[¥÷‰*:R´¥×níîH´E}6} Õ£ÏÂäã°?ŸÚpq½‹kïó14v2•¤NLhl’&9+’{(Ÿr}f>,ÀJ*æçÀj5/æ©”¾#¨¡õJÐI@‡!‘EPìÒ¥KÄahÞ¼9µlÙÒÓû¢qõ² W桽„Åm~ÑÙŒ†VRå{Ô ÀË+Á=âRÓä*/ w›DÜ^ JØ|vo¢¼µó(oÃbÊß¾†#ãna Ï6ŠÍªŸñ¥–%_F”2ðòÕm¥v;Z#⮊º»“ç&ËÓ­bÆú=Ìšr@½èGé^„Âê‡wˆGþÀaþäXÈÚÉÓ>žºt6ïYOU98‚[Aï)Ño# Aj ðJî¾ûn¯’Ž›tv Zqiðl‡ æ)Vð²ú{š‘‰#ñ`žƒ¦ ÚÉh‰²Žxy<‹hMÚëJÓä%(¥=m&ôl «rò·,§¼•QÞŠ™”·z“$¿Ó}°´4ª¤xDcIÒ䤷·ƒ£#u§ŸÚQû`Ù±t FB…Ìäþ2]ÎÁëŽæÍÆòØÁÚlIÌeòÖ~¦élº3T!ƒ4éM…!/H0zqܸq RÚ辈ˆœ\Z|<&ÔE^jUÃõ|,X Mc3`Àm°N¸Òµš̘¨»½|¦Ð4­Y³Æj–Âzž¦°Âi=1øÁäcI˜(å-N¹³>§¼e3Ø? žÌáTvIÁ)<‹øM4Œ‰Šß±9ú•iJ5žÎÂ-i‚Ÿ ´VÔö˜áýz‹Ð5… ÌãÈЋh:ÝG ÷¹Ùˆ¶ÊÛMÞåÚøFsmE3ì€×èÅÚ ³òÂ<¤ y&ÎÒÑh¹¦dß+[@$4ZFùÒd„JŒíË_=›Ž¼ã‘,kïü³“ÎoAeüŽäÁÎ <†Þž];ø<žvuM+LªOlÒ:òŒÕƒ ÷9Ý@ÀD8ûY‘ÿÐÿ¨ë“”|FÏЦKÑ·T‘sá8ýH¾p¤)iVÀwàuTn+ùHôs@š qŽUY´h‘VgþøãQ->gÎë ‡`:±Œ8¼U‰Ë„4•€$wó®.Èî‹3·Ñý¿l£óè:¤ •4;vä%z.¦óØoh ̳‰Ã—èâ!naô¬ákEô„i=ªù5¦M<…ŠÑ?+)–<ø tƒˆ iÐs‡ŸW#’ÐcGœ7,ªÜO¥ÕÇ6Ža1ý¨_£ãhðô×â[_½z5¡±…»ÞÉ1Üš}„–Àý¢ö«cXë5úû4ÁDZNáШ¨ü¯X±‚nº‰CäDIÃsë­‘ËŠ‘–0)b¡°FYñl•>‡Àß8û ¸Û /l+üVûÔˆnøß½ùæ›ôÓO?iÖ ´+èp?µ ƒQ,8+êWl[‰µìýÓ2ìá?!M‚*i¼láýGòhì‚=tk™f8i„ §vd(ÝÈWT¤&TW»,‹çÊã)ví$az.4MVI“Jä eÑ8zŒÎ§û9vúl&¬Ç3äuS‡]­ažÒä ˆ]Œ šAhO±€ðªßØÖ7"¨¬Q£²Få‚b·óàuÁ iú‹gå2dˆ6 m”Øè¡aW¤®h0ñc[-8ŽÆçªPߪÆçè4zr¡e à_`>ôǰ­'"øF ž $ØõNŽá~(7Èž*·ÂkEÈ þ~HS;uîÜYË#Þ„ŠÁ¤ÊѺuë¨ÎO9y²?f"0D˜<[}þõÛF8>œƒ}z­ÈŠ3Ço¼xûõë§ÝûA2± ?x¾jÁóÀ6ÖúE}§x¿ôd u/0Ç7‹Ð«™h?ôÐCÈfDEHSDá.º^¼á)«³¨G²´ëqÏÏÊ=ð²Ã¹Î®œLþ £¸î úŒõNØMÂð|4vI\Y¦pOÐO쾂–ÒL¦MŸk#ç†Ó†÷°³•…Y#a'D?ï*QT˜h°TÀV|XTã¼Ueó±k4ØÉW× -¤‹5ÎA£¯z®øÆPù*r„mEšÐ ÀGé©ë'MšD]t92¦ ø6a2iäu·nÝ´eƒ6zø­ˆŽÒ®à·ÚÆ=}œM!|õÕWiÔ¨QÑ̆¥{÷íÛ—>ÿœë•(i“,e’O²2€ÆjZÑ8O}çø>¹Â¶ª Ps2â¼HjÒ7‚ï‰!\¤iÜ¢=tWÏêtç› Z§òi~•ª¢¡ñr¢IYÀ#审ë5gð³é4¦-´°vîmtîÚµk©wožÆÝ¦´¥>„2˜)ÔÝ<¢n-OvÜ€÷ºôҢݸ¸É¿×ÂÏæþûï'ÄòÁû‘Á»Ê ¤kô21šû•š =Þ¼s  3 íhÈAz€·¾G#…kÕõê\ë°ibÁõNezÿý÷^îÙu'žx" 4ˆ¢«É³B$ ò÷òË/{}›°¤í7Þ‘X'Ma)lQ¦;³Žü¼yó¨k×®'L€DHS”^ ôn”ÀìN[“EåS“¨kí²T³\ m?ÈQÚ¹÷hWР¡á²+sÙ¼'E¨1ÇIú….¥ ©ûuui÷£/v€Ãæ1sï .š¦’o´˜$œ €"Bâ„ߨø0 ÙÉûUò®Þî6çŽ;îðö&RÇLóá¨/Ü:"—Ä Á».#òТx“Ûo¿=jwÒ%èÑ3w«i:GÏý¹ƒ†ëwx®Z6™¶Èe¿&û…‚&ÀI£¶‡CZ‚0AªñßMt+é>qMš zuKšÆ³SÕ KpKh™DÓTü½Â´?XD¼GÀêHRïs’Øwù‡°‰ý”¼-½};Ž·ùI˜Ôjvôš·‹zÕ-K]X˦iס¢ÑúsCmƒ49éíÕbR‚`–JØ­ŽðŸ/ L:Níò{häøQ[i5›ç>q“•Âk¥‡Y…l ‹ü•O\‚àMS^¦p8¯ýoÉ^úâœZ çðˆ!Ö<}·b-Úq˜–yÒZž¨öòö•©“©PŸ8ÃÚ•~j =µ ÖԆݯ—sø‘ìŠý»ÝdŠŸ8ºÚÕììfš4‘^ä‘s3Ø“é\žÐåFÍk¼¼£Ž:Ê‹d%MA@ˆ#®¾úê8Ê­dÕ „4yjˆ4¡I ‡¦é않ݩy•4zöÏíôŸ^ö5=Íš5+–V¸~h!`0x ÞsjCým4l¼þeì·uReïä ˆ²wÞÆfÈ=ù”½—(ß`*¿¤2D©Ì¿Òªù(•óQ†¡)à £ôLÞÏð%ë Â9Æ Z =NSq™àÀ!ðjh;"îâØUW].˜$A@8@@HSÌsáÐ4f½mtš³åf®ƒæ)œ’:†(o*“ýÅSõ!®[¬ñÒöI÷ñoèó5ძlÆÌ„/HJ ¤Va¢Ã ¤`åÿa# 9œÌ12oÑšuZt_L}€$ ~fð¥ÊÌ̤Æ7Ešàk…àj O'Ÿ|rÁ½e%‚€ (iŠÂ“örƲëšÓžl&µš‘¯;‰§”¡¼U³‰Žø¼}É©¦%!€i š0Ä"Q 40Úv“½ZClc‚Ò!NnsQ’*¤uø·¢yžàh®‚‚œà8‚ a[¹S,]º”Ƨ% ­®Q œ1A~”à8œëU´X¬U@E”gçÎÚoä~[ F˜ö iÓ¦Ô£Gºà‚ ¨Q£FRiÊZA@D@HS "øm¦Iqzk_ù*”Ô´;%5ìDMv$_½6”ëKÖÈLI»›îÖ´+аlþâOÚüæD-ª­š:B‘ Ï!?4aX°1œ –ûÀH„²LaÁ|DjkôÔÿVÛȇšËdhÛ¶mZÞW­ZU H‘Jçê‡ý*â¥"BÃô‰üB[„|ªüb“‘é¯ðF²!‚€ ˜ ¤É/wCó†ÞŠ€Ðìd³Ðn>Ë,¾ÚS¾íõÕ ½GÊÓ¾Ý>Ú÷Ó^Ö}ËÓX|ªi  I@P<,ù…)0 ætÃ647 IŠlXÉO$ÎA8p ]yå•¶'íDþä‚€ ‰‰€ME<8]$’À$SÑù矯ÍÕ_ø9Á*˜•°ÚœªU3˜øøXc’NËS•ª­©BÅj…š hS@~ ]Á¢´D‘,S¸ï°€éNDA@ˆ„4Eá)Œ?^ $ r-滂¶æ1pX& ˜Âàë#"‚€ ÑG@HSôŸä@A@â’ãºã Ó’EA@A@"€¦H#.÷A@¸D@HS\>6É´ ‚€ D!M‘F\î'‚€ q‰€¦¸|l’iA@A@ˆ4Bš"¸ÜOA@â!MqùØ$Ó‚€ ‚€ i„4Eq¹Ÿ ‚€ Ä%Bšâò±I¦A@A ÒiŠ4âr?A@A@ˆK„4Ååc“L ‚€ ‚@¤ÒiÄå~‚€ ‚€ —iŠËÇ&™A@H# ¤)ÒˆËýA@A .Ò—M2-‚€ ‘F@HS¤—û ‚€ ‚@\" ¤).›dZA@"€¦H#.÷A@¸D@HS\>6É´ ‚€ D!M‘F\î'‚€ q‰€¦¸|l’iA@A@ˆ4Bš"¸ÜOA@â!MqùØ$Ó‚€ ‚€ i„4Eq¹Ÿ !pèp6eí?äQê’¬ øòYA@øFUù€K_¢µwR—v èä¾íé¢3ºSZjJ|Lr/ÄBšbèaHVA@pƒÀÔé‹('7¾ùqMùu!Õ¯S…^{lµlRÛM² y-Èç‚¥hÉŠÍ´bí6:x0›7¨NW]Лj×ÌHHL¤ÐDBšä-A "°zývºsø§´lõýÂêܶA),eøŠMÝì×Ò×SçÒ¤_ÐöY…‰§¦&S™´6}¦ºµ*Ó÷ £ô2©…Çe#qÒ”8ÏZJ*¥…Ë6ÒðW¿¦Ñ/)4ÉÉΡkÿó-Z¾‰>1”Ô«Í[÷Ðê ;¨s›úT&HÁœkéž§þG«×ïÐðéÚ¾!Õ¥)µoYš7®ÅD)ƒ’’’è…‘“éí¦·ŸLýŽnK9X:cwé|®R*Í]žäIDATA@H0vîÞOÍ_C —n¤NZ%ø3½öè ºàÆ·èê{Þ§O˜8U®TΙ;†B³æ­¦òeÓèæ+O !lŠ*í’“KWÞ9ŠË\†î½á:鸶š6ɨÜÑvgT*ktXö%Bšà!KA ô#³dÖ݅¤ ¿+VH§‘O_NçA7=ôzþJJMIÆ¡rÉÙ½èŒ;Q× )³v•ÇKã`ñà-§Q·ö¨a¦¹&¤ôßý­áÒ±uýÒ…”ÉrÀHrŠ ±Ž@ÅòéZhCôùÍd‡ðÃ/%˜¡žñþP±íSúµ§ OïNÍÖL(ŸsOî”0¤×FO%`{ýྔœ,Mg±'~È“O ‡-EÄE~:·_=€Æ|1ƒæ/^Ÿ¸@8(ù'_Ï¢'üI-Ø¿éœ]¤ —”„4•–')å„F Ÿü!÷‚…Þ»üÜ£¨Y£ôtmSBƒhPø¿ØÇë‘—¾¤êU+ЛO^*Z&Œi—¦DzÚRVA@(µ¨0ÅjmTÐöß¹ñ²ã5g﹋Ö"ûtlÙ¾—n}dœ¶çuŽwU¿NUÝQÙLD„4%âS—2 ‚@©C@i˜”ÆÉ¬€'ÛVÓš|5e®Ù)²Ÿ8ÌSÒÜøÀ‡´mç>ºï¦S9ÊzCÁE =Ç/ÁèO§ÓŸÿ¬Ò‚®µo•Igò¨¨EA ñP#âÉ Zx81÷êÜDÓ6=1FB#†8SukV¦*å´Ñ€I>eçäiu_Nn.U¯R*8‡+Û¿ü%Íc߯óNéJ—ò¨BAiŠÓ÷`Ùª-ôŤ9Ô猪´kïúhâŸôâ;“iè%ÇÑÐKûš)ŽÓâ:ÎöL&•ë7OêL>®hEÒŠ@9Ž­ÙàpÈ""Èå¯3—=oú_Ë Á1ûU2ˆãçßϦÕë¶Óí× š†ÛƒæÅíïÑ~ƒ*íÊ3éɻϡþ½Û¨]®×9“>ÿn6uàÎè#ÃÎpž$PzҧϲIƒôî³Wj– šÿþç9"ð7„ÊîõÇ.)<§EtmôP´Áëêq ›žÜ»J+eÓӴަú%yÿƒ'üíaxÊžndOÖAÃcjç’•›é¶ÇÆk¸^ºŠ2*ÚYµryúû›i÷Þƒ´yÛÚ±+‹Iáò%ù¨VµŠT‡¿ëš¼g‡H‹¬þÚךV ~Li<}Šˆ  Ÿ&…Dœ­÷í?DO¿ùma®Qi`Vó #o¤\® Ýò_B/-Q“m½oŒF˜€ÁÖû )w!P.=•²,hš3¡iÊ/3A' 1Ð93Ô-0â¼yj‘§G|KyyE$ Ôè°€è,æ o_ÿG£d,íÃô% Omš×¥>=ZÐÀ¾í~Yˆz^«z¥°¦î`ýçéÏ(;;—^|ðB™˜×ÒJ¬“„BÇéó† þçKKä¾÷ºFñ䜃‡½£Í95îõk #fI±ç ÷eÓB.]~ÞÑôþÿ~O¤âKYŠʲVæ@P:BÎY©±4;qÃæÝ´kÏž´v5¨[rÄ,ÝXðm½Ås±AZ5­Cg±÷Ç|w{X; Ó¨O~£ñ_Í¢Agö¤Æõ«›Ý2èþ¿yz˜O¿ù‹Öð¼x#ì·µ'ÔŽRR’´¹ö^yø"×Úä±à $ïÂÓºÓ1ݚ͓LL„4Åés?ÈÄ`Ͼƒ´|õVŽ»R³X)*”+Coñ„’§^ñ ½9ö'ºùŠŠü^áŠ5ÛxtHƒ¸Œˆ½7<0–V®Ý¦Íì¾rÝ6­¸e¹."”vªU)OÛChUÇ}9‹ÊÖ4Ófx`êÈÖíÆZ˜öæ,XGßü8_s*G]ôâ;SNè£>ýM›øöùû/ Ô¬Y½"ý>{Oáò!ý:GÛØ/qÁÒ Lšvjù‚3{-N·iÆÚ^ånž©“–¾1üêÑ©1ýþ÷r­(ÐÀ‰¥ø mã:á±W¾ÒF}Á¤¥t²^åé@zwoN &õÕWÛHDdãÖÝjW±u2›ÌÖ± ¼•7¿„IÒvºå‘éÁ&ð„Àeéí§Óq=[j× ¶Ñs÷Ï™5“Ý›O ¶ío‰ùð°x)s®cÍZ >§—餯p}€È4*^>‰ØN[HSl?ÓÜ)gÏô2æ#ÆF}2ý ¦ÑS÷œcšÖ5K –mÔ¦0=)`DÏä_г÷žW8âgó¶½ZÎ1\YD(í@Ëüï’ 4ŽGÝ0¸_±âÂ$wÇðOØ”•LÜzD†ÚOúy ¹ w±tðã–!'hœSžï‰'SÆÞNë6íâÐT¦LqÍî Ç´¦—¾x~x鋸M‹…b–V<ìÎ#^®8ÿhͯBåþ5[í—µ P€OÓ~ça>ƒó5´ÇÎ8qò?ôã:àm6Ýù)âqÙ9GiëÀýøÝ²Immуïd0Ÿ% T9ªKS-¶\Ç6õõ—ÆÄv÷ŽØñ½:ýðÛ"m ÌÛ´lR‹šóüspzßÊn ÀTHS R¥û·¦8}¾…у͙Àe;¹_;zòohûöï ¤ÉìúxÚA `„ 4láŒãxù-Ä íZø; §õïH7_y<ˆØ®5ü}z4çxF•b%›1›¢svÑ–˜Í¤d,ªiŠ*üÎo®L¹l_%:;úÓßiã–ÝÚH“Pç{}DïvºDTó*å óÈr:FyVû@Ê 2LX!"kA@7irƒ^¯Eì!ø-ìå9ŸÚß̇áž©‘²ð¬àJ;e5ÛцÉo!0½aÂÌ™¬UiZËg>ñú·ZšÝzP›J˜ìÒh.<£û!¸Ý`žmP•Œr”’œLe˦òÈŸêÔç« ‡ÀtQ¦Ìp¤)i‚€ $.BšâøÙ—ç˜"ßÿü/Õæi ®¾¨O‰’`>¨¡÷Õ&á½3È”p˜Æ(‡ æ·ƒ¦óØýÂóÓaz† O릙å Éyõ‘‹4ÒsÇ…iúbÒ-€^`Z¿µÑzÀœV×_Ú—yùK:iðËÚÔ ˜š¥OÚéxòÏÖ…øã/ÿä©[ú˜Nx©ÒçzÿÃþ²r(±¬Ìš+ä“oB€æR@1R¯FÕŠÔ±u¦63z°©h™OIKA ¾ÒÇϯ2;MC›2ž°1B¬'Oƒ²}Wýðë"úeæRÍúÑÛÏ9mLpuX[(0›!ÖK¯.M´CžÞ]›ØwÚ‹5uÝ%Ç»dØUýéº{ÇÐí¢ÍÞï¨VÅŽ{õãHv.G>ÞFˆÿtøpŽæ¥¿×Ôé‹ô?‹mƒèÁ¹]DA@…€{ß¡CJ‡JEŽG+îxvìÚOàˆhßziÛ¢.=xËiÔ¥]hÿ ¿ç¯¡»³ ¯aâ«Ë1­ÛÿrÊ?¬mšIW]Ø›ú÷Ž|4b¼Î ’˜¨þRëyÖõädå/¯:Êí4Ty‚NŒA@B! ¤)B1|üÎ'>¡ù‹7ð$“7д?–hÚ–òl–:Š5Cmš×áœKÖA@øC@Ìsñ÷Ì s|ù¹GÓ{Ÿü¦™ÐN;¡Cá~ÙA@ð# š¦ðc*) ‚€ ‚@)DÀ8¢a),¨IA@7irƒž\+‚€  ƒ€¦„yÔRPA@A@pƒ€&7èɵ‚€ ‚€ 0iJ˜G-A@7irƒž\+‚€  ƒ€¦„yÔRPA@A@pƒ€&7èɵ‚€ ‚€ 0iJ˜G-A@7irƒž\+‚€  ƒ€¦„yÔRPA@A@pƒ€&7èɵ‚€ ‚€ 0iJ˜G-A@7irƒž\+‚€  ƒ€¦„yÔRPA@A@pƒ€&7èɵ‚€ ‚€ 0iJ˜G-A@7irƒž\+‚€  ƒ€¦„yÔRPA@A@pƒ€&7èɵ‚€ ‚€ 0iJ˜G-A@7irƒž\+‚€  ƒ€¦„yÔRPA@A@pƒ€&7èɵ‚€ ‚€ 0iJ˜G-A@7irƒž\+‚€  ƒ€¦„yÔRPA@A@pƒ€&7èɵ‚€ ‚€ 0iJ˜G-A@7irƒž\+‚€  ƒ€¦„yÔRPA@A@pƒ€&7èɵ‚€ ‚€ 0iJ˜G-A@7irƒž\+‚€  ƒ€¦„yÔRPA@A@pƒ€&7èɵ‚€ ‚€ 0ü(xð“‘×IªIEND®B`‚doc/index.md000066400000000000000000000036461316246254300132730ustar00rootroot00000000000000 # Torch Package Reference Manual # [![Join the chat at https://gitter.im/torch/torch7](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/torch/torch7?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Build Status](https://travis-ci.org/torch/torch7.svg)](https://travis-ci.org/torch/torch7) __Torch__ is the main package in [Torch7](http://torch.ch) where data structures for multi-dimensional tensors and mathematical operations over these are defined. Additionally, it provides many utilities for accessing files, serializing objects of arbitrary types and other useful utilities. ## Torch Packages ## * Tensor Library * [Tensor](tensor.md) defines the _all powerful_ tensor object that provides multi-dimensional numerical arrays with type templating. * [Mathematical operations](maths.md) that are defined for the tensor object types. * [Storage](storage.md) defines a simple storage interface that controls the underlying storage for any tensor object. * File I/O Interface Library * [File](file.md) is an abstract interface for common file operations. * [Disk File](diskfile.md) defines operations on files stored on disk. * [Memory File](memoryfile.md) defines operations on stored in RAM. * [Pipe File](pipefile.md) defines operations for using piped commands. * [High-Level File operations](serialization.md) defines higher-level serialization functions. * Useful Utilities * [Timer](timer.md) provides functionality for _measuring time_. * [Tester](tester.md) is a generic tester framework. * [CmdLine](cmdline.md) is a command line argument parsing utility. * [Random](random.md) defines a random number generator package with various distributions. * Finally useful [utility](utility.md) functions are provided for easy handling of torch tensor types and class inheritance. doc/maths.md000077500000000000000000002726451316246254300133120ustar00rootroot00000000000000 # Math Functions # Torch provides MATLAB-like functions for manipulating [`Tensor`](tensor.md) objects. Functions fall into several types of categories: * [Constructors](#torch.construction.dok) like [`zeros`](#torch.zeros), [`ones`](#torch.ones); * Extractors like [`diag`](#torch.diag) and [`triu`](#torch.triu); * [Element-wise](#torch.elementwise.dok) mathematical operations like [`abs`](#torch.abs) and [`pow`](#torch.pow); * [BLAS](#torch.basicoperations.dok) operations; * [Column or row-wise operations](#torch.columnwise.dok) like [`sum`](#torch.sum) and [`max`](#torch.max); * [Matrix-wide operations](#torch.matrixwide.dok) like [`trace`](#torch.trace) and [`norm`](#torch.norm); * [Convolution and cross-correlation](#torch.conv.dok) operations like [`conv2`](#torch.conv2); * [Basic linear algebra operations](#torch.linalg.dok) like [`eig`](#torch.eig); * [Logical operations](#torch.logical.dok) on `Tensor`s. By default, all operations allocate a new `Tensor` to return the result. However, all functions also support passing the target `Tensor`(s) as the first argument(s), in which case the target `Tensor`(s) will be resized accordingly and filled with result. This property is especially useful when one wants have tight control over when memory is allocated. The *Torch* package adopts the same concept, so that calling a function directly on the `Tensor` itself using an object-oriented syntax is equivalent to passing the `Tensor` as the optional resulting `Tensor`. The following two calls are equivalent. ```lua torch.log(x, x) x:log() ``` Similarly, `torch.conv2` function can be used in the following manner. ```lua > x = torch.rand(100, 100) > k = torch.rand(10, 10) > res1 = torch.conv2(x, k) -- case 1 > res2 = torch.Tensor() > torch.conv2(res2, x, k) -- case 2 > res2:dist(res1) 0 ``` The advantage of second case is, same `res2` `Tensor` can be used successively in a loop without any new allocation. ```lua -- no new memory allocations... > for i = 1, 100 do torch.conv2(res2, x, k) end > res2:dist(res1) 0 ``` ## Construction or extraction functions ## ### [res] torch.cat( [res,] x_1, x_2, [dimension] ) ### ### [res] torch.cat( [res,] {x_1, x_2, ...}, [dimension] ) ### `x = torch.cat(x_1, x_2, [dimension])` returns a `Tensor` `x` which is the concatenation of `Tensor`s `x_1` and `x_2` along dimension `dimension`. If `dimension` is not specified or if it is `-1`, it is the maximum last dimension over all input tensors, except if all tensors are empty, then it is `1`. The other dimensions of `x_1` and `x_2` have to be equal. Also supports arrays with arbitrary numbers of `Tensor`s as inputs. Empty tensors are ignored during catting, and thus do not throw an error. Performing cat on empty tensors only will always result in an empty tensor. Examples: ```lua > torch.cat(torch.ones(3), torch.zeros(2)) 1 1 1 0 0 [torch.DoubleTensor of size 5] > torch.cat(torch.ones(3, 2), torch.zeros(2, 2), 1) 1 1 1 1 1 1 0 0 0 0 [torch.DoubleTensor of size 5x2] > torch.cat(torch.ones(2, 2), torch.zeros(2, 2), 1) 1 1 1 1 0 0 0 0 [torch.DoubleTensor of size 4x2] > torch.cat(torch.ones(2, 2), torch.zeros(2, 2), 2) 1 1 0 0 1 1 0 0 [torch.DoubleTensor of size 2x4] > torch.cat(torch.cat(torch.ones(2, 2), torch.zeros(2, 2), 1), torch.rand(3, 2), 1) 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.3227 0.0493 0.9161 0.1086 0.2206 0.7449 [torch.DoubleTensor of size 7x2] > torch.cat({torch.ones(2, 2), torch.zeros(2, 2), torch.rand(3, 2)}, 1) 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.3227 0.0493 0.9161 0.1086 0.2206 0.7449 [torch.DoubleTensor of size 7x2] > torch.cat({torch.Tensor(), torch.rand(3, 2)}, 1) 0.3227 0.0493 0.9161 0.1086 0.2206 0.7449 [torch.DoubleTensor of size 3x2] ``` ### [res] torch.diag([res,] x [,k]) ### `y = torch.diag(x)` when `x` is of dimension 1 returns a diagonal matrix with diagonal elements constructed from `x`. `y = torch.diag(x)` when `x` is of dimension 2 returns a `Tensor` of dimension 1 with elements constructed from the diagonal of `x`. `y = torch.diag(x, k)` returns the k-th diagonal of `x`, where `k = 0` is the main diagonal, `k > 0` is above the main diagonal and `k < 0` is below the main diagonal. ### [res] torch.eye([res,] n [,m]) ### `y = torch.eye(n)` returns the `n × n` identity matrix. `y = torch.eye(n, m)` returns an `n × m` identity matrix with ones on the diagonal and zeros elsewhere. ### [res] torch.histc([res,] x [,nbins, min_value, max_value]) ### `y = torch.histc(x)` returns the histogram of the elements in `x`. By default the elements are sorted into 100 equally spaced bins between the minimum and maximum values of `x`. `y = torch.histc(x, n)` same as above with `n` bins. `y = torch.histc(x, n, min, max)` same as above with `n` bins and `[min, max]` as elements range. ### [res] torch.bhistc([res,] x [,nbins, min_value, max_value]) ### `y = torch.bhistc(x)` returns the histogram of the elements in 2d tensor `x` along the last dimension. By default the elements are sorted into 100 equally spaced bins between the minimum and maximum values of `x`. `y = torch.bhistc(x, n)` same as above with `n` bins. `y = torch.bhistc(x, n, min, max)` same as above with `n` bins and `[min, max]` as elements range. ```lua x = torch.Tensor(3, 6) > x[1] = torch.Tensor{ 2, 4, 2, 2, 5, 4 } > x[2] = torch.Tensor{ 3, 5, 1, 5, 3, 5 } > x[3] = torch.Tensor{ 3, 4, 2, 5, 5, 1 } > x 2 4 2 2 5 4 3 5 1 5 3 5 3 4 2 5 5 1 [torch.DoubleTensor of size 3x6] > torch.bhistc(x, 5, 1, 5) 0 3 0 2 1 1 0 2 0 3 1 1 1 1 2 [torch.DoubleTensor of size 3x5] > y = torch.Tensor(1, 6):copy(x[1]) > torch.bhistc(y, 5) 3 0 2 0 1 [torch.DoubleTensor of size 1x5] ``` ### [res] torch.linspace([res,] x1, x2, [,n]) ### `y = torch.linspace(x1, x2)` returns a one-dimensional `Tensor` of size 100 equally spaced points between `x1` and `x2`. `y = torch.linspace(x1, x2, n)` returns a one-dimensional `Tensor` of `n` equally spaced points between `x1` and `x2`. ### [res] torch.logspace([res,] x1, x2, [,n]) ### `y = torch.logspace(x1, x2)` returns a one-dimensional `Tensor` of `100` logarithmically eqally spaced points between `10^x1` and `10^x2`. `y = torch.logspace(x1, x2, n)` returns a one-dimensional `Tensor` of `n` logarithmically equally spaced points between `10^x1` and `10^x2`. ### [res] torch.multinomial([res,], p, n, [,replacement]) ### `y = torch.multinomial(p, n)` returns a `Tensor` `y` where each row contains `n` indices sampled from the [multinomial probability distribution](http://en.wikipedia.org/wiki/Multinomial_distribution) located in the corresponding row of `Tensor` `p`. The rows of `p` do not need to sum to one (in which case we use the values as weights), but must be non-negative and have a non-zero sum. Indices are ordered from left to right according to when each was sampled (first samples are placed in first column). If `p` is a vector, `y` is a vector size `n`. If `p` is a m-rows matrix, `y` is an `m × n` matrix. If `replacement` is `true`, samples are drawn **with replacement**. If not, they are drawn **without replacement**, which means that when a sample index is drawn for a row, it cannot be drawn again for that row. This implies the constraint that `n` must be lower than `p` length (or number of columns of `p` if it is a matrix). The default value for `replacement` is `false`. ```lua p = torch.Tensor{1, 1, 0.5, 0} a = torch.multinomial(p, 10000, true) > a ... [torch.LongTensor of dimension 10000] > for i = 1, 4 do print(a:eq(i):sum()) end 3967 4016 2017 0 ``` Note: If you use the function with a given result `Tensor`, i.e. of the function prototype: `torch.multinomial(res, p, n [, replacement])` then you will have to call it slightly differently as: ```lua p.multinomial(res, p, n, replacement) -- p.multinomial instead of torch.multinomial ``` This is due to the fact that the result here is of a `LongTensor` type, and we do not define a `torch.multinomial` over long `Tensor`s. ### [state] torch.multinomialAliasSetup(probs) ### ### [res] torch.multinomialAlias(output, state) `state = torch.multinomialAliasSetup(probs)` returns a table `state` consisting of two `tensors` : `probability table` and an `alias table`. This is required once for each `probs` vectors. We can then sample from the multinomial distribution multiple times by consulting these tensors `state` table. `torch.multinomialAlias(output, state)` returns `output` filled with indices drawn from the multinomial distribution `probs`. `output` itself is filled with the indices and it is not necessary to get the return value of the statement. The sampling is done through a technique defined in a very simple way in this blog about [The Alias Method](https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/). The paper that describes this technique is present [here](http://www.tandfonline.com/doi/abs/10.1080/00031305.1979.10482697). This can only sample with replacement. The `output` `Tensor` that is fed into the `multinomialAlias` method need not be contiguous. The `output` tensor can only be a 1d tensor. If you are required to fill a nd tensor enter a 1d view of the same tensor. This method is exceptionally faster than `torch.multinomial` when you want to sample a lot of samples from the same distrbution or sample from the same distribution a large number of times. `torch.multinomial` is faster for sampling few samples from a distribution once because the `multinomialAliasSetup` method takes some time in this case. To see and compare how these two methods differ in speed run `th test/test_aliasMultinomial.lua`. ```lua > state = torch.multinomialAliasSetup(probs) > state { 1 : LongTensor - size: 4 2 : DoubleTensor - size: 4 } > output = torch.LongTensor(2,3) > torch.multinomialAlias(output:view(-1), state) 4 1 2 3 2 2 [torch.LongTensor of size 6] > output 4 1 2 3 2 2 [torch.LongTensor of size 2x3] ``` You can also allocate memory and reuse it for the state table. ```lua > state = {torch.LongTensor(), torch.DoubleTensor()} > probs = torch.DoubleTensor({0.2, 0.3, 0.5}) > state = torch.multinomialAliasSetup(probs, state) > state { 1 : LongTensor - size: 3 2 : DoubleTensor - size: 3 } > output = torch.LongTensor(7) > torch.multinomialAlias(output, state) 2 2 3 1 2 2 2 [torch.LongTensor of size 7] ``` ### [res] torch.ones([res,] m [,n...]) ### `y = torch.ones(n)` returns a one-dimensional `Tensor` of size `n` filled with ones. `y = torch.ones(m, n)` returns a `m × n` `Tensor` filled with ones. For more than `4` dimensions, you can use a storage as argument: `y = torch.ones(torch.LongStorage{m, n, k, l, o})`. ### [res] torch.rand([res,] [gen,] m [,n...]) ### `y = torch.rand(n)` returns a one-dimensional `Tensor` of size `n` filled with random numbers from a uniform distribution on the interval `[0, 1)`. `y = torch.rand(m, n)` returns a `m × n` `Tensor` of random numbers from a uniform distribution on the interval `[0, 1)`. For more than 4 dimensions, you can use a storage as argument: `y = torch.rand(torch.LongStorage{m, n, k, l, o})`. `y = torch.rand(gen, m, n)` returns a `m × n` `Tensor` of random numbers from a uniform distribution on the interval `[0, 1)`, using a non-global random number generator `gen` created by [torch.Generator()](random.md#torch.Generator). ### [res] torch.randn([res,] [gen,] m [,n...]) ### `y = torch.randn(n)` returns a one-dimensional `Tensor` of size `n` filled with random numbers from a normal distribution with mean zero and variance one. `y = torch.randn(m, n)` returns a `m × n` `Tensor` of random numbers from a normal distribution with mean zero and variance one. For more than 4 dimensions, you can use a storage as argument: `y = torch.randn(torch.LongStorage{m, n, k, l, o})`. `y = torch.randn(gen, m, n)` returns a `m × n` `Tensor` of random numbers from a normal distribution with mean zero and variance one, using a non-global random number generator `gen` created by [torch.Generator()](random.md#torch.Generator). ### [res] torch.range([res,] x, y [,step]) ### `y = torch.range(x, y)` returns a `Tensor` of size `floor((y - x) / step) + 1` with values from `x` to `y` with step `step` (default to 1). ```lua > torch.range(2, 5) 2 3 4 5 [torch.DoubleTensor of size 4] > torch.range(2, 5, 1.2) 2.0000 3.2000 4.4000 [torch.DoubleTensor of size 3] ``` ### [res] torch.randperm([res,] [gen,] n) ### `y = torch.randperm(n)` returns a random permutation of integers from 1 to `n`. `y = torch.randperm(gen, n)` returns a random permutation of integers from 1 to `n`, using a non-global random number generator `gen` created by [torch.Generator()](random.md#torch.Generator). ### [res] torch.reshape([res,] x, m [,n...]) ### `y = torch.reshape(x, m, n)` returns a new `m × n` `Tensor` y whose elements are taken rowwise from `x`, which must have `m * n` elements. The elements are copied into the new `Tensor`. For more than 4 dimensions, you can use a storage: `y = torch.reshape(x, torch.LongStorage{m, n, k, l, o})`. ### [res] torch.tril([res,] x [,k]) ### `y = torch.tril(x)` returns the lower triangular part of `x`, the other elements of `y` are set to 0. `torch.tril(x, k)` returns the elements on and below the k-th diagonal of `x` as non-zero. `k = 0` is the main diagonal, `k > 0` is above the main diagonal and `k < 0` is below the main diagonal. ### [res] torch.triu([res,] x, [,k]) ### `y = torch.triu(x)` returns the upper triangular part of `x`, the other elements of `y` are set to 0. `torch.triu(x, k)` returns the elements on and above the k-th diagonal of `x` as non-zero. `k = 0` is the main diagonal, `k > 0` is above the main diagonal and `k < 0` is below the main diagonal. ### [res] torch.zeros([res,] x) ### `y = torch.zeros(n)` returns a one-dimensional `Tensor` of size n filled with zeros. `y = torch.zeros(m, n)` returns a `m × n` `Tensor` filled with zeros. For more than 4 dimensions, you can use a storage: `y = torch.zeros(torch.LongStorage{m, n, k, l, o})`. ## Element-wise Mathematical Operations ## ### [res] torch.abs([res,] x) ### `y = torch.abs(x)` returns a new `Tensor` with the absolute values of the elements of `x`. `x:abs()` replaces all elements in-place with the absolute values of the elements of `x`. ### [res] torch.sign([res,] x) ### `y = torch.sign(x)` returns a new `Tensor` with the sign (`+/- 1`) of the elements of `x`. `x:sign()` replaces all elements in-place with the sign of the elements of `x`. ### [res] torch.acos([res,] x) ### `y = torch.acos(x)` returns a new `Tensor` with the arcosine of the elements of `x`. `x:acos()` replaces all elements in-place with the arcosine of the elements of `x`. ### [res] torch.asin([res,] x) ### `y = torch.asin(x)` returns a new `Tensor` with the arcsine of the elements of `x`. `x:asin()` replaces all elements in-place with the arcsine of the elements of `x`. ### [res] torch.atan([res,] x) ### `y = torch.atan(x)` returns a new `Tensor` with the arctangent of the elements of `x`. `x:atan()` replaces all elements in-place with the arctangent of the elements of `x`. ### [res] torch.atan2([res,] x, y) ### `y = torch.atan2(x, y)` returns a new `Tensor` with the arctangent of the elements of `x` and `y`. Note that the arctangent of the elements `x` and `y` refers to the signed angle in radians between the rays ending at origin where the first one starts at (1, 0) and the second at (y, x). `x:atan2()` replaces all elements in-place with the arctangent of the elements of `x` and `y`. ### [res] torch.ceil([res,] x) ### `y = torch.ceil(x)` returns a new `Tensor` with the values of the elements of `x` rounded up to the nearest integers. `x:ceil()` replaces all elements in-place with the values of the elements of `x` rounded up to the nearest integers. ### [res] torch.cos([res,] x) ### `y = torch.cos(x)` returns a new `Tensor` with the cosine of the elements of `x`. `x:cos()` replaces all elements in-place with the cosine of the elements of `x`. ### [res] torch.cosh([res,] x) ### `y = torch.cosh(x)` returns a new `Tensor` with the hyberbolic cosine of the elements of `x`. `x:cosh()` replaces all elements in-place with the hyberbolic cosine of the elements of `x`. ### [res] torch.exp([res,] x) ### `y = torch.exp(x)` returns, for each element in `x`, *e* (*Neper number*, the base of natural logarithms) raised to the power of the element in `x`. `x:exp()` returns, for each element in `x`, *e* raised to the power of the element in `x`. ### [res] torch.floor([res,] x) ### `y = torch.floor(x)` returns a new `Tensor` with the values of the elements of `x` rounded down to the nearest integers. `x:floor()` replaces all elements in-place with the values of the elements of `x` rounded down to the nearest integers. ### [res] torch.log([res,] x) ### `y = torch.log(x)` returns a new `Tensor` with the natural logarithm of the elements of `x`. `x:log()` replaces all elements in-place with the natural logarithm of the elements of `x`. ### [res] torch.log1p([res,] x) ### `y = torch.log1p(x)` returns a new `Tensor` with the natural logarithm of the elements of `x + 1`. `x:log1p()` replaces all elements in-place with the natural logarithm of the elements of `x + 1`. This function is more accurate than [`log`](#torch.log) for small values of `x`. ### x:neg() ### `x:neg()` replaces all elements in-place with the sign-reversed values of the elements of `x`. ### x:cinv() ### `x:cinv()` replaces all elements in-place with `1.0 / x`. ### [res] torch.pow([res,] x, n) ### Let `x` be a `Tensor` and `n` a number. `y = torch.pow(x, n)` returns a new `Tensor` with the elements of `x` to the power of `n`. `y = torch.pow(n, x)` returns, a new `Tensor` with `n` to the power of the elements of `x`. `x:pow(n)` replaces all elements in-place with the elements of `x` to the power of `n`. `torch.pow(x, n, x)` replaces all elements in-place with `n` to the power of the elements of `x`. ### [res] torch.round([res,] x) ### `y = torch.round(x)` returns a new `Tensor` with the values of the elements of `x` rounded to the nearest integers. `x:round()` replaces all elements in-place with the values of the elements of `x` rounded to the nearest integers. ### [res] torch.sin([res,] x) ### `y = torch.sin(x)` returns a new `Tensor` with the sine of the elements of `x`. `x:sin()` replaces all elements in-place with the sine of the elements of `x`. ### [res] torch.sinh([res,] x) ### `y = torch.sinh(x)` returns a new `Tensor` with the hyperbolic sine of the elements of `x`. `x:sinh()` replaces all elements in-place with the hyperbolic sine of the elements of `x`. ### [res] torch.sqrt([res,] x) ### `y = torch.sqrt(x)` returns a new `Tensor` with the square root of the elements of `x`. `x:sqrt()` replaces all elements in-place with the square root of the elements of `x`. ### [res] torch.rsqrt([res,] x) ### `y = torch.rsqrt(x)` returns a new `Tensor` with the reciprocal of the square root of the elements of `x`. `x:rsqrt()` replaces all elements in-place with the reciprocal of the square root of the elements of `x`. ### [res] torch.tan([res,] x) ### `y = torch.tan(x)` returns a new `Tensor` with the tangent of the elements of `x`. `x:tan()` replaces all elements in-place with the tangent of the elements of `x`. ### [res] torch.tanh([res,] x) ### `y = torch.tanh(x)` returns a new `Tensor` with the hyperbolic tangent of the elements of `x`. `x:tanh()` replaces all elements in-place with the hyperbolic tangent of the elements of `x`. ### [res] torch.sigmoid([res,] x) ### `y = torch.sigmoid(x)` returns a new `Tensor` with the sigmoid of the elements of `x`. `x:sigmoid()` replaces all elements in-place with the sigmoid of the elements of `x`. ### [res] torch.trunc([res,] x) ### `y = torch.trunc(x)` returns a new `Tensor` with the truncated integer values of the elements of `x`. `x:trunc()` replaces all elements in-place with the truncated integer values of the elements of `x`. ### [res] torch.frac([res,] x) ### `y = torch.frac(x)` returns a new `Tensor` with the fractional portion of the elements of `x`. `x:frac()` replaces all elements in-place with the fractional portion of the elements of `x`. ## Basic operations ## In this section, we explain basic mathematical operations for `Tensor`s. ### [boolean] equal([tensor1,] tensor2) ### Returns `true` iff the dimensions and values of `tensor1` and `tensor2` are exactly the same. ```lua x = torch.Tensor{1,2,3} y = torch.Tensor{1,2,3} > x:equal(y) true y = torch.Tensor{1,2,4} > x:equal(y) false ``` Note that `a:equal(b)` is more efficient that `a:eq(b):all()` as it avoids allocation of a temporary tensor and can short-circuit. ### [res] torch.add([res,] tensor, value) ### Add the given value to all elements in the `Tensor`. `y = torch.add(x, value)` returns a new `Tensor`. `x:add(value)` add `value` to all elements in place. ### [res] torch.add([res,] tensor1, tensor2) ### Add `tensor1` to `tensor2` and put result into `res`. The number of elements must match, but sizes do not matter. ```lua > x = torch.Tensor(2, 2):fill(2) > y = torch.Tensor(4):fill(3) > x:add(y) > x 5 5 5 5 [torch.DoubleTensor of size 2x2] ``` `y = torch.add(a, b)` returns a new `Tensor`. `torch.add(y, a, b)` puts `a + b` in `y`. `a:add(b)` accumulates all elements of `b` into `a`. `y:add(a, b)` puts `a + b` in `y`. ### [res] torch.add([res,] tensor1, value, tensor2) ### Multiply elements of `tensor2` by the scalar `value` and add it to `tensor1`. The number of elements must match, but sizes do not matter. ```lua > x = torch.Tensor(2, 2):fill(2) > y = torch.Tensor(4):fill(3) > x:add(2, y) > x 8 8 8 8 [torch.DoubleTensor of size 2x2] ``` `x:add(value, y)` multiply-accumulates values of `y` into `x`. `z:add(x, value, y)` puts the result of `x + value * y` in `z`. `torch.add(x, value, y)` returns a new `Tensor` `x + value * y`. `torch.add(z, x, value, y)` puts the result of `x + value * y` in `z`. ### tensor:csub(value) ### Subtracts the given value from all elements in the `Tensor`, in place. ### tensor:csub(tensor2) ### Subtracts `tensor2` from `tensor`, in place. The number of elements must match, but sizes do not matter. ```lua > x = torch.Tensor(2, 2):fill(8) > y = torch.Tensor(4):fill(3) > x:csub(y) > x 5 5 5 5 [torch.DoubleTensor of size 2x2] ``` `a:csub(b)` put `a - b` into `a`. ### [res] torch.mul([res,] tensor1, value) ### Multiply all elements in the `Tensor` by the given `value`. `z = torch.mul(x, 2)` will return a new `Tensor` with the result of `x * 2`. `torch.mul(z, x, 2)` will put the result of `x * 2` in `z`. `x:mul(2)` will multiply all elements of `x` with `2` in-place. `z:mul(x, 2)` will put the result of `x * 2` in `z`. ### [res] torch.clamp([res,] tensor, min_value, max_value) ### Clamp all elements in the `Tensor` into the range `[min_value, max_value]`. ie: ``` ⎧ min_value, if x_i < min_value y_i = ⎨ x_i, if min_value ≤ x_i ≤ max_value ⎩ max_value, if x_i > max_value ``` `z = torch.clamp(x, 0, 1)` will return a new `Tensor` with the result of `x` bounded between `0` and `1`. `torch.clamp(z, x, 0, 1)` will put the result in `z`. `x:clamp(0, 1)` will perform the clamp operation in place (putting the result in `x`). `z:clamp(x, 0, 1)` will put the result in `z`. ### [res] torch.cmul([res,] tensor1, tensor2) ### Element-wise multiplication of `tensor1` by `tensor2`. The number of elements must match, but sizes do not matter. ```lua > x = torch.Tensor(2, 2):fill(2) > y = torch.Tensor(4):fill(3) > x:cmul(y) > = x 6 6 6 6 [torch.DoubleTensor of size 2x2] ``` `z = torch.cmul(x, y)` returns a new `Tensor`. `torch.cmul(z, x, y)` puts the result in `z`. `y:cmul(x)` multiplies all elements of `y` with corresponding elements of `x`. `z:cmul(x, y)` puts the result in `z`. ### [res] torch.cpow([res,] tensor1, tensor2) ### Element-wise power operation, taking the elements of `tensor1` to the powers given by elements of `tensor2`. The number of elements must match, but sizes do not matter. ```lua > x = torch.Tensor(2, 2):fill(2) > y = torch.Tensor(4):fill(3) > x:cpow(y) > x 8 8 8 8 [torch.DoubleTensor of size 2x2] ``` `z = torch.cpow(x, y)` returns a new `Tensor`. `torch.cpow(z, x, y)` puts the result in `z`. `y:cpow(x)` takes all elements of `y` to the powers given by the corresponding elements of `x`. `z:cpow(x, y)` puts the result in `z`. ### [res] torch.addcmul([res,] x [,value], tensor1, tensor2) ### Performs the element-wise multiplication of `tensor1` by `tensor2`, multiply the result by the scalar `value` (1 if not present) and add it to `x`. The number of elements must match, but sizes do not matter. ```lua > x = torch.Tensor(2, 2):fill(2) > y = torch.Tensor(4):fill(3) > z = torch.Tensor(2, 2):fill(5) > x:addcmul(2, y, z) > x 32 32 32 32 [torch.DoubleTensor of size 2x2] ``` `z:addcmul(value, x, y)` accumulates the result in `z`. `torch.addcmul(z, value, x, y)` returns a new `Tensor` with the result. `torch.addcmul(z, z, value, x, y)` puts the result in `z`. ### [res] torch.div([res,] tensor, value) ### Divide all elements in the `Tensor` by the given `value`. `z = torch.div(x, 2)` will return a new `Tensor` with the result of `x / 2`. `torch.div(z, x, 2)` will put the result of `x / 2` in `z`. `x:div(2)` will divide all elements of `x` with `2` in-place. `z:div(x, 2)` puts the result of `x / 2` in `z`. ### [res] torch.cdiv([res,] tensor1, tensor2) ### Performs the element-wise division of `tensor1` by `tensor2`. The number of elements must match, but sizes do not matter. ```lua > x = torch.Tensor(2, 2):fill(1) > y = torch.range(1, 4) > x:cdiv(y) > x 1.0000 0.5000 0.3333 0.2500 [torch.DoubleTensor of size 2x2] ``` `z = torch.cdiv(x, y)` returns a new `Tensor`. `torch.cdiv(z, x, y)` puts the result in `z`. `y:cdiv(x)` divides all elements of `y` with corresponding elements of `x`. `z:cdiv(x, y)` puts the result in `z`. ### [res] torch.lshift([res,] tensor, value) ### Left shift all elements in the `Tensor` by the given `value`. `z = torch.lshift(x, 2)` will return a new `Tensor` with the result of `x << 2`. `torch.lshift(z, x, 2)` will put the result of `x << 2` in `z`. `x:lshift(2)` will perform left shift operation all elements of `x` by `2` bits. `z:lshift(x, 2)` puts the result of `x << 2` in `z`. Note: For float type tensors, `x:lshift(value)` evaluates `x:mul(math.pow(2, value))` internally. ### [res] torch.clshift([res,] tensor1, tensor2) ### Performs the left shift operation of each element in `tensor1` by each element in `tensor2`. The number of elements must match, but sizes do not matter. ```lua > x = torch.LongTensor(2, 2):fill(1) > y = torch.LongTensor(2, 2):range(1, 4) > x:clshift(y) > x 2 4 8 16 [torch.LongTensor of size 2x2] ``` `z = torch.clshift(x, y)` returns a new `Tensor`. `torch.clshift(z, x, y)` puts the result in `z`. `y:clshift(x)` left shifts all elements of `y` with corresponding elements of `x`. `z:clshift(x, y)` puts the result in `z`. ### [res] torch.rshift([res,] tensor, value) ### Right shift all elements in the `Tensor` by the given `value`. `z = torch.rshift(x, 2)` will return a new `Tensor` with the result of `x >> 2`. `torch.rshift(z, x, 2)` will put the result of `x >> 2` in `z`. `x:rshift(2)` will perform right shift operation all elements of `x` by `2` bits. `z:rshift(x, 2)` puts the result of `x >> 2` in `z`. Note: For float type tensors, `x:lshift(value)` evaluates `x:div(math.pow(2, value))` internally. ### [res] torch.crshift([res,] tensor1, tensor2) ### Performs the right shift operation of each element in `tensor1` by each element in `tensor2`. The number of elements must match, but sizes do not matter. ```lua > x = torch.LongTensor(2, 2):fill(32) > y = torch.LongTensor(2, 2):range(1, 4) > x:crshift(y) > x 16 8 4 2 [torch.LongTensor of size 2x2] ``` `z = torch.crshift(x, y)` returns a new `Tensor`. `torch.crshift(z, x, y)` puts the result in `z`. `y:crshift(x)` right shifts all elements of `y` with corresponding elements of `x`. `z:crshift(x, y)` puts the result in `z`. ### [res] torch.addcdiv([res,] x [,value], tensor1, tensor2) ### Performs the element-wise division of `tensor1` by `tensor2`, multiply the result by the scalar `value` and add it to `x`. The number of elements must match, but sizes do not matter. ```lua > x = torch.Tensor(2, 2):fill(1) > y = torch.range(1, 4) > z = torch.Tensor(2, 2):fill(5) > x:addcdiv(2, y, z) > x 1.4000 1.8000 2.2000 2.6000 [torch.DoubleTensor of size 2x2] ``` `z:addcdiv(value, x, y)` accumulates the result in `z`. `torch.addcdiv(z, value, x, y)` returns a new `Tensor` with the result. `torch.addcdiv(z, z, value, x, y)` puts the result in `z`. ### [res] torch.fmod([res,] tensor, value) ### Computes remainder of division (rounded towards zero) of all elements in the `Tensor` by `value`. This works both for integer and floating point numbers. It behaves the same as Lua bulit-in function `math.fmod()` and a little bit different from `torch.remainder()` and `%` operator. For example: ```lua > x = torch.Tensor({-3, 3}) > torch.fmod(x, 2) -1 1 [torch.DoubleTensor of size 2] > torch.fmod(x, -2) -1 1 [torch.DoubleTensor of size 2] > torch.remainder(x, 2) 1 1 [torch.DoubleTensor of size 2] > torch.remainder(x, -2) -1 -1 [torch.DoubleTensor of size 2] ``` `z = torch.fmod(x, 2)` will return a new `Tensor` with the result of `math.fmod(x, 2)`. `torch.fmod(z, x, 2)` will put the result of `math.fmod(x, 2)` in `z`. `x:fmod(2)` will replace all elements of `x` the result of `math.fmod(x, 2)` in-place. `z:fmod(x, 2)` puts the result of `math.fmod(x, 2)` in `z`. ### [res] torch.remainder([res,] tensor, value) ### Computes remainder of division (rounded to nearest) of all elements in the `Tensor` by `value`. This works both for integer and floating point numbers. It behaves the same as `%` operator and can be expressed as `a % b = a - b * floor(a/b)`. See `torch.fmod()` for comparison. `z = torch.remainder(x, 2)` will return a new `Tensor` with the result of `x % 2`. `torch.remainder(z, x, 2)` will put the result of `x % 2` in `z`. `x:remainder(2)` will replace all elements of `x` the result of `x % 2` in-place. `z:remainder(x, 2)` puts the result of `x % 2` in `z`. ### [res] torch.mod([res,] tensor, value) ### This function is deprecated and exists only for compatibility with previous versions. Please use `torch.fmod()` or `torch.remainder()` instead. ### [res] torch.cfmod([res,] tensor1, tensor2) ### Computes the element-wise remainder of the division (rounded towards zero) of `tensor1` by `tensor2`. The number of elements must match, but sizes do not matter. ```lua > x = torch.Tensor({{3, 3}, {-3, -3}}) > y = torch.Tensor({{2, -2}, {2, -2}}) > x:cfmod(y) 1 1 -1 -1 [torch.DoubleTensor of size 2x2] ``` `z = torch.cfmod(x, y)` returns a new `Tensor`. `torch.cfmod(z, x, y)` puts the result in `z`. `y:cfmod(x)` replaces all elements of `y` by their remainders of division (rounded towards zero) by corresponding elements of `x`. `z:cfmod(x, y)` puts the result in `z`. ### [res] torch.cremainder([res,] tensor1, tensor2) ### Computes element-wise remainder of the division (rounded to nearest) of `tensor1` by `tensor2`. The number of elements must match, but sizes do not matter. ```lua > x = torch.Tensor({{3, 3}, {-3, -3}}) > y = torch.Tensor({{2, -2}, {2, -2}}) > x:cfmod(y) 1 1 -1 -1 [torch.DoubleTensor of size 2x2] ``` `z = torch.cremainder(x, y)` returns a new `Tensor`. `torch.cremainder(z, x, y)` puts the result in `z`. `y:cremainder(x)` replaces all elements of `y` by their remainders of division (rounded to nearest) by corresponding elements of `x`. `z:cremainder(x, y)` puts the result in `z`. ### [res] torch.cmod([res,] tensor1, tensor2) ### This function is deprecated and exists only for compatibility with previous versions. Please use `torch.cfmod()` or `torch.cremainder()` instead. ### [res] torch.bitand([res,] tensor, value) ### Performs bitwise `and` operation on all elements in the `Tensor` by the given `value`. `z = torch.bitand(x, value)` will return a new `Tensor` with the result of `x & value`. `torch.bitand(z, x, value)` will put the result of `x & value` in `z`. `x:bitand(value)` will perform right shift operation all elements of `x` by `value` bits. `z:bitand(x, value)` puts the result of `x & value` in `z`. Note: This function is only supported for [Int|Long|Byte]Tensors ### [res] torch.cbitand([res,] tensor1, tensor2) ### Performs bitwise `and` operation of each element in `tensor1` by each element in `tensor2`. The number of elements must match, but sizes do not matter. ```lua > x = torch.LongTensor(4):fill(6) > y = torch.LongTensor{1, 2, 4, 8} > x:cbitand(y) > x 0 2 4 0 [torch.LongTensor of size 4] ``` `z = torch.cbitand(x, y)` returns a new `Tensor`. `torch.cbitand(z, x, y)` puts the result in `z`. `y:cbitand(x)` performs bitwise `and` all elements of `y` with corresponding elements of `x`. `z:cbitand(x, y)` puts the result in `z`. Note: This function is only supported for [Int|Long|Byte]Tensors ### [res] torch.bitor([res,] tensor, value) ### Performs bitwise `or` operation on all elements in the `Tensor` by the given `value`. `z = torch.bitor(x, value)` will return a new `Tensor` with the result of `x & value`. `torch.bitor(z, x, value)` will put the result of `x | value` in `z`. `x:bitor(value)` will perform right shift operation all elements of `x` by `value` bits. `z:bitor(x, value)` puts the result of `x | value` in `z`. Note: This function is only supported for [Int|Long|Byte]Tensors ### [res] torch.cbitor([res,] tensor1, tensor2) ### Performs bitwise `or` operation of each element in `tensor1` by each element in `tensor2`. The number of elements must match, but sizes do not matter. ```lua > x = torch.LongTensor(4):fill(3) > y = torch.LongTensor{1, 2, 4, 8} > x:cbitor(y) > x 3 3 7 11 [torch.LongTensor of size 4] ``` `z = torch.cbitor(x, y)` returns a new `Tensor`. `torch.cbitor(z, x, y)` puts the result in `z`. `y:cbitor(x)` performs bitwise `or` all elements of `y` with corresponding elements of `x`. `z:cbitor(x, y)` puts the result in `z`. Note: This function is only supported for [Int|Long|Byte]Tensors ### [res] torch.bitxor([res,] tensor, value) ### Performs bitwise `xor` operation on all elements in the `Tensor` by the given `value`. `z = torch.bitxor(x, value)` will return a new `Tensor` with the result of `x & value`. `torch.bitxor(z, x, value)` will put the result of `x ^ value` in `z`. `x:bitxor(value)` will perform right shift operation all elements of `x` by `value` bits. `z:bitxor(x, value)` puts the result of `x ^ value` in `z`. Note: This function is only supported for [Int|Long|Byte]Tensors ### [res] torch.cbitxor([res,] tensor1, tensor2) ### Performs bitwise `xor` operation of each element in `tensor1` by each element in `tensor2`. The number of elements must match, but sizes do not matter. ```lua > x = torch.LongTensor(4):fill(15) > y = torch.LongTensor{1, 2, 4, 8} > x:cbitxor(y) > x 14 13 11 7 [torch.LongTensor of size 4] ``` `z = torch.cbitxor(x, y)` returns a new `Tensor`. `torch.cbitxor(z, x, y)` puts the result in `z`. `y:cbitxor(x)` performs bitwise `xor` all elements of `y` with corresponding elements of `x`. `z:cbitxor(x, y)` puts the result in `z`. Note: This function is only supported for [Int|Long|Byte]Tensors ### [number] torch.dot(tensor1, tensor2) ### Performs the dot product between `tensor1` and `tensor2`. The number of elements must match: both `Tensor`s are seen as a 1D vector. ```lua > x = torch.Tensor(2, 2):fill(2) > y = torch.Tensor(4):fill(3) > x:dot(y) 24 ``` `torch.dot(x, y)` returns dot product of `x` and `y`. `x:dot(y)` returns dot product of `x` and `y`. ### [res] torch.addmv([res,] [v1,] vec1, [v2,] mat, vec2) ### Performs a matrix-vector multiplication between `mat` (2D `Tensor`) and `vec2` (1D `Tensor`) and add it to `vec1`. Optional values `v1` and `v2` are scalars that multiply `vec1` and `vec2` respectively. In other words, ``` res = (v1 * vec1) + (v2 * (mat * vec2)) ``` Sizes must respect the matrix-multiplication operation: if `mat` is a `n × m` matrix, `vec2` must be vector of size `m` and `vec1` must be a vector of size `n`. ```lua > x = torch.Tensor(3):fill(0) > M = torch.Tensor(3, 2):fill(3) > y = torch.Tensor(2):fill(2) > x:addmv(M, y) > x 12 12 12 [torch.DoubleTensor of size 3] ``` `torch.addmv(x, y, z)` returns a new `Tensor` with the result. `torch.addmv(r, x, y, z)` puts the result in `r`. **Differences when used as a method** `x:addmv(y, z)` does `x = x + y * z` `r:addmv(x, y, z)` does `r = x + y * z` if x is a vector `r:addmv(s, y, z)` does `r = r + s * y * z` if `s` is a scalar. `r:addmv(x, s, y, z)` does `r = x + s * y * z` if `s` is a scalar and `x` is a vector. `r:addmv(s1, s2, y, z)` does `r = s1 * r + s2 * y * z` if `s1` and `s2` are scalars. The last example does not accurately fit into the function signature, and needs a special mention. It changes the function signature to: `[vec1] = vec1:addmv([v1,] [v2,] mat, vec2)` ### [res] torch.addr([res,] [v1,] mat, [v2,] vec1, vec2) ### Performs the outer-product between `vec1` (1D `Tensor`) and `vec2` (1D `Tensor`). Optional values `v1` and `v2` are scalars that multiply `mat` and `vec1 [out] vec2` respectively. In other words, ``` res_ij = (v1 * mat_ij) + (v2 * vec1_i * vec2_j) ``` If `vec1` is a vector of size `n` and `vec2` is a vector of size `m`, then `mat` must be a matrix of size `n × m`. ```lua > x = torch.range(1, 3) > y = torch.range(1, 2) > M = torch.Tensor(3, 2):zero() > M:addr(x, y) 1 2 -- |0 0| |1 2| 2 4 -- = 1*|0 0| + 1*|2 4| 3 6 -- |0 0| |3 6| [torch.DoubleTensor of size 3x2] -- default values of v1 and v2 are 1. > M:addr(2, 1, x, y) 3 6 -- |1 2| |1 2| 6 12 -- = 2*|2 4| + 1*|2 4| 9 18 -- |3 6| |3 6| [torch.DoubleTensor of size 3x2] > A = torch.range(1, 6):resize(3, 2) > A 1 2 3 4 5 6 [torch.DoubleTensor of size 3x2] > M:addr(2, A, 1, x, y) 3 6 -- |1 2| |1 2| 8 12 -- 2*|3 4| + 1*|2 4| 13 18 -- |5 6| |3 6| [torch.DoubleTensor of size 3x2] ``` `torch.addr(M, x, y)` returns the result in a new `Tensor`. `torch.addr(r, M, x, y)` puts the result in `r`. `M:addr(x, y)` puts the result in `M`. `r:addr(M, x, y)` puts the result in `r`. ### [res] torch.addmm([res,] [v1,] M, [v2,] mat1, mat2) ### Performs a matrix-matrix multiplication between `mat1` (2D `Tensor`) and `mat2` (2D `Tensor`). Optional values `v1` and `v2` are scalars that multiply `M` and `mat1 * mat2` respectively. In other words, ``` res = (v1 * M) + (v2 * mat1 * mat2) ``` If `mat1` is a `n × m` matrix, `mat2` a `m × p` matrix, `M` must be a `n × p` matrix. `torch.addmm(M, mat1, mat2)` returns the result in a new `Tensor`. `torch.addmm(r, M, mat1, mat2)` puts the result in `r`. **Differences when used as a method** `M:addmm(mat1, mat2)` does `M = M + mat1 * mat2`. `r:addmm(M, mat1, mat2)` does `r = M + mat1 * mat2`. `r:addmm(v1, M, v2, mat1, mat2)` does `r = (v1 * M) + (v2 * mat1 * mat2)`. `M:addmm(v1, v2, mat1, mat2)` does `M = (v1 * M) + (v2 * mat1 * mat2)`. The last example does not accurately fit into the function signature, and needs a special mention. It changes the function signature to: `[M] = M:addmm([v1,] [v2,] mat1, mat2)` ### [res] torch.addbmm([res,] [v1,] M, [v2,] batch1, batch2) ### Batch matrix matrix product of matrices stored in `batch1` and `batch2`, with a reduced add step (all matrix multiplications get accumulated in a single place). `batch1` and `batch2` must be 3D `Tensor`s each containing the same number of matrices. If `batch1` is a `b × n × m` `Tensor`, `batch2` a `b × m × p` `Tensor`, res will be a `n × p` `Tensor`. In other words, ``` res = (v1 * M) + (v2 * sum(batch1_i * batch2_i, i = 1, b)) ``` `torch.addbmm(M, x, y)` puts the result in a new `Tensor`. `M:addbmm(x, y)` puts the result in `M`, resizing `M` if necessary. `M:addbmm(beta, M2, alpha, x, y)` puts the result in `M`, resizing `M` if necessary. ### [res] torch.baddbmm([res,] [v1,] M, [v2,] batch1, batch2) ### Batch matrix matrix product of matrices stored in `batch1` and `batch2`, with batch add. `batch1` and `batch2` must be 3D `Tensor`s each containing the same number of matrices. If `batch1` is a `b × n × m` `Tensor`, `batch2` a `b × m × p` `Tensor`, res will be a `b × n × p` `Tensor`. In other words, ``` res_i = (v1 * M_i) + (v2 * batch1_i * batch2_i) ``` `torch.baddbmm(M, x, y)` puts the result in a new `Tensor`. `M:baddbmm(x, y)` puts the result in `M`, resizing `M` if necessary. `M:baddbmm(beta, M2, alpha, x, y)` puts the result in `M`, resizing `M` if necessary. ### [res] torch.mv([res,] mat, vec) ### Matrix vector product of `mat` and `vec`. Sizes must respect the matrix-multiplication operation: if `mat` is a `n × m` matrix, `vec` must be vector of size `m` and `res` must be a vector of size `n`. `torch.mv(x, y)` puts the result in a new `Tensor`. `torch.mv(M, x, y)` puts the result in `M`. `M:mv(x, y)` puts the result in `M`. ### [res] torch.mm([res,] mat1, mat2) ### Matrix matrix product of `mat1` and `mat2`. If `mat1` is a `n × m` matrix, `mat2` a `m × p` matrix, `res` must be a `n × p` matrix. `torch.mm(x, y)` puts the result in a new `Tensor`. `torch.mm(M, x, y)` puts the result in `M`. `M:mm(x, y)` puts the result in `M`. ### [res] torch.bmm([res,] batch1, batch2) ### Batch matrix matrix product of matrices stored in `batch1` and `batch2`. `batch1` and `batch2` must be 3D `Tensor`s each containing the same number of matrices. If `batch1` is a `b × n × m` `Tensor`, `batch2` a `b × m × p` `Tensor`, `res` will be a `b × n × p` `Tensor`. `torch.bmm(x, y)` puts the result in a new `Tensor`. `torch.bmm(M, x, y)` puts the result in `M`, resizing `M` if necessary. `M:bmm(x, y)` puts the result in `M`, resizing `M` if necessary. ### [res] torch.ger([res,] vec1, vec2) ### Outer product of `vec1` and `vec2`. If `vec1` is a vector of size `n` and `vec2` is a vector of size `m`, then `res` must be a matrix of size `n × m`. `torch.ger(x, y)` puts the result in a new `Tensor`. `torch.ger(M, x, y)` puts the result in `M`. `M:ger(x, y)` puts the result in `M`. ### [res] torch.lerp([res,] a, b, weight) ### Linear interpolation of two scalars or tensors based on a weight: `res = a + weight * (b - a)` `torch.lerp(a, b, weight)` puts the result in a new `Tensor` if `a` and `b` are tensors. If `a` and `b` are scalars the functions returns a number. `torch.lerp(M, a, b, weight)` puts the result in `M`. `M:lerp(a, b, weight)` puts the result in `M`. ## Overloaded operators ## It is possible to use basic mathematical operators like `+`, `-`, `/`, `*` and `%` with `Tensor`s. These operators are provided as a convenience. While they might be handy, they create and return a new `Tensor` containing the results. They are thus not as fast as the operations available in the [previous section](#torch.BasicOperations.dok). Another important point to note is that these operators are only overloaded when the first operand is a `Tensor`. For example, this will NOT work: ```lua > x = 5 + torch.rand(3) ``` ### Addition and subtraction ### You can add a `Tensor` to another one with the `+` operator. Subtraction is done with `-`. The number of elements in the `Tensor`s must match, but the sizes do not matter. The size of the returned `Tensor` will be the size of the first `Tensor`. ```lua > x = torch.Tensor(2, 2):fill(2) > y = torch.Tensor(4):fill(3) > = x + y 5 5 5 5 [torch.DoubleTensor of size 2x2] > = y - x 1 1 1 1 [torch.DoubleTensor of size 4] ``` A scalar might also be added or subtracted to a `Tensor`. The scalar needs to be on the right of the operator. ```lua > x = torch.Tensor(2, 2):fill(2) > = x + 3 5 5 5 5 [torch.DoubleTensor of size 2x2] ``` ### Negation ### A `Tensor` can be negated with the `-` operator placed in front: ```lua > x = torch.Tensor(2, 2):fill(2) > = -x -2 -2 -2 -2 [torch.DoubleTensor of size 2x2] ``` ### Multiplication ### Multiplication between two `Tensor`s is supported with the `*` operators. The result of the multiplication depends on the sizes of the `Tensor`s. - 1D and 1D: Returns the dot product between the two `Tensor`s (scalar). - 2D and 1D: Returns the matrix-vector operation between the two `Tensor`s (1D `Tensor`). - 2D and 2D: Returns the matrix-matrix operation between the two `Tensor`s (2D `Tensor`). Sizes must be conformant for the corresponding operation. A `Tensor` might also be multiplied by a scalar. The scalar might be on the right or left of the operator. Examples: ```lua > M = torch.Tensor(2, 2):fill(2) > N = torch.Tensor(2, 4):fill(3) > x = torch.Tensor(2):fill(4) > y = torch.Tensor(2):fill(5) > = x * y -- dot product 40 > = M * x --- matrix-vector 16 16 [torch.DoubleTensor of size 2] > = M * N -- matrix-matrix 12 12 12 12 12 12 12 12 [torch.DoubleTensor of size 2x4] ``` ### Division and Modulo (remainder) ### Only the division of a `Tensor` by a scalar is supported with the operator `/`. Example: ```lua > x = torch.Tensor(2, 2):fill(2) > = x/3 0.6667 0.6667 0.6667 0.6667 [torch.DoubleTensor of size 2x2] ``` Similarly, the remainder of the division of a `Tensor`s elements by a scalar can be obtained with the operator `%`. Example: > x = torch.Tensor{{1,2},{3,4}} > = x % 3 1 2 0 1 [torch.Tensor of size 2x2] ## Column or row-wise operations (dimension-wise operations) ## ### [res] torch.cross([res,] a, b [,n]) ### `y = torch.cross(a, b)` returns the cross product of `a` and `b` along the first dimension of length 3. `y = torch.cross(a, b, n)` returns the cross product of vectors in dimension `n` of `a` and `b`. `a` and `b` must have the same size, and both `a:size(n)` and `b:size(n)` must be 3. ### [res] torch.cumprod([res,] x [,dim]) ### `y = torch.cumprod(x)` returns the cumulative product of the elements of `x`, performing the operation over the last dimension. `y = torch.cumprod(x, n)` returns the cumulative product of the elements of `x`, performing the operation over dimension `n`. ```lua -- 1. cumulative product for a vector > A = torch.range(1, 5) > A 1 2 3 4 5 [torch.DoubleTensor of size 5] > B = torch.cumprod(A) > B 1 -- B(1) = A(1) = 1 2 -- B(2) = A(1)*A(2) = 1*2 = 2 6 -- B(3) = A(1)*A(2)*A(3) = 1*2*3 = 6 24 -- B(4) = A(1)*A(2)*A(3)*A(4) = 1*2*3*4 = 24 120 -- B(5) = A(1)*A(2)*A(3)*A(4)*A(5) =1*2*3*4*5 = 120 [torch.DoubleTensor of size 5] -- 2. cumulative product for a matrix > A = torch.LongTensor{{1, 4, 7}, {2, 5, 8}, {3, 6, 9}} > A 1 4 7 2 5 8 3 6 9 [torch.LongTensor of size 3x3] > B = torch.cumprod(A) > B 1 4 7 2 20 56 6 120 504 [torch.LongTensor of size 3x3] -- Why? -- B(1, 1) = A(1, 1) = 1 -- B(2, 1) = A(1, 1)*A(2, 1) = 1*2 = 2 -- B(3, 1) = A(1, 1)*A(2, 1)*A(3, 1) = 1*2*3 = 6 -- B(1, 2) = A(1, 2) = 4 -- B(2, 2) = A(1, 2)*A(2, 2) = 4*5 = 20 -- B(3, 2) = A(1, 2)*A(2, 2)*A(3, 2) = 4*5*6 = 120 -- B(1, 3) = A(1, 3) = 7 -- B(2, 3) = A(1, 3)*A(2, 3) = 7*8 = 56 -- B(3, 3) = A(1, 3)*A(2, 3)*A(3, 3) = 7*8*9 = 504 -- 3. cumulative product along 2-dim > B = torch.cumprod(A, 2) > B 1 4 28 2 10 80 3 18 162 [torch.LongTensor of size 3x3] -- Why? -- B(1, 1) = A(1, 1) = 1 -- B(1, 2) = A(1, 1)*A(1, 2) = 1*4 = 4 -- B(1, 3) = A(1, 1)*A(1, 2)*A(1, 3) = 1*4*7 = 28 -- B(2, 1) = A(2, 1) = 2 -- B(2, 2) = A(2, 1)*A(2, 2) = 2*5 = 10 -- B(2, 3) = A(2, 1)*A(2, 2)*A(2, 3) = 2*5*8 = 80 -- B(3, 1) = A(3, 1) = 3 -- B(3, 2) = A(3, 1)*A(2, 3) = 3*6 = 18 -- B(3, 3) = A(3, 1)*A(2, 3)*A(3, 3) = 3*6*9 = 162 ``` ### [res] torch.cumsum([res,] x [,dim]) ### `y = torch.cumsum(x)` returns the cumulative sum of the elements of `x`, performing the operation over the first dimension. `y = torch.cumsum(x, n)` returns the cumulative sum of the elements of `x`, performing the operation over dimension `n`. ### torch.max([resval, resind,] x [,dim]) ### `y = torch.max(x)` returns the single largest element of `x`. `y, i = torch.max(x, 1)` returns the largest element in each column (across rows) of `x`, and a `Tensor` `i` of their corresponding indices in `x`. `y, i = torch.max(x, 2)` performs the max operation for each row. `y, i = torch.max(x, n)` performs the max operation over the dimension `n`. ```lua > x = torch.randn(3, 3) > x 1.1994 -0.6290 0.6888 -0.0038 -0.0908 -0.2075 0.3437 -0.9948 0.1216 [torch.DoubleTensor of size 3x3] > torch.max(x) 1.1993977428735 > torch.max(x, 1) 1.1994 -0.0908 0.6888 [torch.DoubleTensor of size 1x3] 1 2 1 [torch.LongTensor of size 1x3] > torch.max(x, 2) 1.1994 -0.0038 0.3437 [torch.DoubleTensor of size 3x1] 1 1 1 [torch.LongTensor of size 3x1] ``` ### [res] torch.mean([res,] x [,dim]) ### `y = torch.mean(x)` returns the mean of all elements of `x`. `y = torch.mean(x, 1)` returns a `Tensor` `y` of the mean of the elements in each column of `x`. `y = torch.mean(x, 2)` performs the mean operation for each row. `y = torch.mean(x, n)` performs the mean operation over the dimension `n`. ### torch.min([resval, resind,] x [,dim]) ### `y = torch.min(x)` returns the single smallest element of `x`. `y, i = torch.min(x, 1)` returns the smallest element in each column (across rows) of `x`, and a `Tensor` `i` of their corresponding indices in `x`. `y, i = torch.min(x, 2)` performs the min operation for each row. `y, i = torch.min(x, n)` performs the min operation over the dimension `n`. ### [res] torch.cmax([res,] tensor1, tensor2) ### Compute the maximum of each pair of values in `tensor1` and `tensor2`. `c = torch.cmax(a, b)` returns a new `Tensor` containing the element-wise maximum of `a` and `b`. `a:cmax(b)` stores the element-wise maximum of `a` and `b` in `a`. `c:cmax(a, b)` stores the element-wise maximum of `a` and `b` in `c`. ```lua > a = torch.Tensor{1, 2, 3} > b = torch.Tensor{3, 2, 1} > torch.cmax(a, b) 3 2 3 [torch.DoubleTensor of size 3] ``` ### [res] torch.cmax([res,] tensor, value) ### Compute the maximum between each value in `tensor` and `value`. `c = torch.cmax(a, v)` returns a new `Tensor` containing the maxima of each element in `a` and `v`. `a:cmax(v)` stores the maxima of each element in `a` and `v` in `a`. `c:cmax(a, v)` stores the maxima of each element in `a` and `v` in `c`. ```lua > a = torch.Tensor{1, 2, 3} > torch.cmax(a, 2) 2 2 3 [torch.DoubleTensor of size 3] ``` ### [res] torch.cmin([res,] tensor1, tensor2) ### Compute the minimum of each pair of values in `tensor1` and `tensor2`. `c = torch.cmin(a, b)` returns a new `Tensor` containing the element-wise minimum of `a` and `b`. `a:cmin(b)` stores the element-wise minimum of `a` and `b` in `a`. `c:cmin(a, b)` stores the element-wise minimum of `a` and `b` in `c`. ```lua > a = torch.Tensor{1, 2, 3} > b = torch.Tensor{3, 2, 1} > torch.cmin(a, b) 1 2 1 [torch.DoubleTensor of size 3] ``` ### [res] torch.cmin([res,] tensor, value) ### Compute the minimum between each value in `tensor` and `value`. `c = torch.cmin(a, v)` returns a new `Tensor` containing the minima of each element in `a` and `v`. `a:cmin(v)` stores the minima of each element in `a` and `v` in `a`. `c:cmin(a, v)` stores the minima of each element in `a` and `v` in `c`. ```lua > a = torch.Tensor{1, 2, 3} > torch.cmin(a, 2) 1 2 2 [torch.DoubleTensor of size 3] ``` ### torch.median([resval, resind,] x [,dim]) ### `y = torch.median(x)` performs the median operation over the last dimension of `x` (one-before-middle in the case of an even number of elements). `y, i = torch.median(x, 1)` returns the median element in each column (across rows) of `x`, and a `Tensor` `i` of their corresponding indices in `x`. `y, i = torch.median(x, 2)` performs the median operation for each row. `y, i = torch.median(x, n)` performs the median operation over the dimension `n`. ```lua > x = torch.randn(3, 3) > x 0.7860 0.7687 -0.9362 0.0411 0.5407 -0.3616 -0.0129 -0.2499 -0.5786 [torch.DoubleTensor of size 3x3] > y, i = torch.median(x) > y 0.7687 0.0411 -0.2499 [torch.DoubleTensor of size 3x1] > i 2 1 2 [torch.LongTensor of size 3x1] > y, i = torch.median(x, 1) > y 0.0411 0.5407 -0.5786 [torch.DoubleTensor of size 1x3] > i 2 2 3 [torch.LongTensor of size 1x3] > y, i = torch.median(x, 2) > y 0.7687 0.0411 -0.2499 [torch.DoubleTensor of size 3x1] > i 2 1 2 [torch.LongTensor of size 3x1] ``` ### torch.mode([resval, resind,] x [,dim]) ### `y = torch.mode(x)` returns the most frequent element of `x` over its last dimension. `y, i = torch.mode(x, 1)` returns the mode element in each column (across rows) of `x`, and a `Tensor` `i` of their corresponding indices in `x`. `y, i = torch.mode(x, 2)` performs the mode operation for each row. `y, i = torch.mode(x, n)` performs the mode operation over the dimension `n`. ### torch.kthvalue([resval, resind,] x, k [,dim]) ### `y = torch.kthvalue(x, k)` returns the `k`-th smallest element of `x` over its last dimension. `y, i = torch.kthvalue(x, k, 1)` returns the `k`-th smallest element in each column (across rows) of `x`, and a `Tensor` `i` of their corresponding indices in `x`. `y, i = torch.kthvalue(x, k, 2)` performs the `k`-th value operation for each row. `y, i = torch.kthvalue(x, k, n)` performs the `k`-th value operation over the dimension `n`. ### [res] torch.prod([res,] x [,n]) ### `y = torch.prod(x)` returns the product of all elements in `x`. `y = torch.prod(x, n)` returns a `Tensor` `y` whom size in dimension `n` is 1 and where elements are the product of elements of `x` with respect to dimension `n`. ```lua > a = torch.Tensor{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}} > a (1,.,.) = 1 2 3 4 (2,.,.) = 5 6 7 8 [torch.DoubleTensor of dimension 2x2x2] > torch.prod(a, 1) (1,.,.) = 5 12 21 32 [torch.DoubleTensor of dimension 1x2x2] > torch.prod(a, 2) (1,.,.) = 3 8 (2,.,.) = 35 48 [torch.DoubleTensor of size 2x1x2] > torch.prod(a, 3) (1,.,.) = 2 12 (2,.,.) = 30 56 [torch.DoubleTensor of size 2x2x1] ``` ### torch.sort([resval, resind,] x [,d] [,flag]) ### `y, i = torch.sort(x)` returns a `Tensor` `y` where all entries are sorted along the last dimension, in **ascending** order. It also returns a `Tensor` `i` that provides the corresponding indices from `x`. `y, i = torch.sort(x, d)` performs the sort operation along a specific dimension `d`. `y, i = torch.sort(x)` is therefore equivalent to `y, i = torch.sort(x, x:dim())` `y, i = torch.sort(x, d, true)` performs the sort operation along a specific dimension `d`, in **descending** order. ```lua > x = torch.randn(3, 3) > x -1.2470 -0.4288 -0.5337 0.8836 -0.1622 0.9604 0.6297 0.2397 0.0746 [torch.DoubleTensor of size 3x3] > torch.sort(x) -1.2470 -0.5337 -0.4288 -0.1622 0.8836 0.9604 0.0746 0.2397 0.6297 [torch.DoubleTensor of size 3x3] 1 3 2 2 1 3 3 2 1 [torch.LongTensor of size 3x3] ``` ### torch.topk([resval, resind,] x, k, [,dim] [,dir] [,sort]) ### `y, i = torch.topk(x, k)` returns all `k` smallest elements in `x` over its last dimension including their indices, in unsorted order. `y, i = torch.topk(x, k, dim)` performs the same operation except over dimension `dim`. `y, i = torch.topk(x, k, dim, dir)` adds a sorting direction that has the same sense as `torch.sort`; `false` returns the `k` smallest elements in the slice, `true` returns the `k` largest elements in the slice. `y, i = torch.topk(x, k, dim, dir, true)` specifies that the results in `y` should be sorted with respect to `dir`; by default, the results are potentially unsorted since the computation may be faster, but if sorting is desired, the sort flag may be passed, in which case the results are returned from smallest to `k`-th smallest (`dir == false`) or highest to `k`-th highest (`dir == true`). The implementation provides no guarantee of the order of selection (indices) among equivalent elements (e.g., topk `k == 2` selection of a vector `{1, 2, 1, 1}`; the values returned could be any pair of `1` entries in the vector). ### [res] torch.std([res,] x, [,dim] [,flag]) ### `y = torch.std(x)` returns the standard deviation of the elements of `x`. `y = torch.std(x, dim)` performs the `std` operation over the dimension `dim`. `y = torch.std(x, dim, false)` performs the `std` operation normalizing by `n-1` (this is the default). `y = torch.std(x, dim, true)` performs the `std` operation normalizing by `n` instead of `n-1`. ### [res] torch.sum([res,] x) ### `y = torch.sum(x)` returns the sum of the elements of `x`. `y = torch.sum(x, 2)` performs the sum operation for each row. `y = torch.sum(x, n)` performs the sum operation over the dimension `n`. ### [res] torch.var([res,] x [,dim] [,flag]) ### `y = torch.var(x)` returns the variance of the elements of `x`. `y = torch.var(x, dim)` performs the `var` operation over the dimension `dim`. `y = torch.var(x, dim, false)` performs the `var` operation normalizing by `n-1` (this is the default). `y = torch.var(x, dim, true)` performs the `var` operation normalizing by `n` instead of `n-1`. ## Matrix-wide operations (`Tensor`-wide operations) ## Note that many of the operations in [dimension-wise operations](#torch.columnwise.dok) can also be used as matrix-wide operations, by just omitting the `dim` parameter. ### torch.norm(x [,p] [,dim]) ### `y = torch.norm(x)` returns the `2`-norm of the `Tensor` `x`. `y = torch.norm(x, p)` returns the `p`-norm of the `Tensor` `x`. `y = torch.norm(x, p, dim)` returns the `p`-norms of the `Tensor` `x` computed over the dimension `dim`. ### torch.renorm([res], x, p, dim, maxnorm) ### Renormalizes the sub-`Tensor`s along dimension `dim` such that they do not exceed norm `maxnorm`. `y = torch.renorm(x, p, dim, maxnorm)` returns a version of `x` with `p`-norms lower than `maxnorm` over non-`dim` dimensions. The `dim` argument is not to be confused with the argument of the same name in function [`norm`](#torch.norm). In this case, the `p`-norm is measured for each `i`-th sub-`Tensor` `x:select(dim, i)`. This function is equivalent to (but faster than) the following: ```lua function renorm(matrix, value, dim, maxnorm) local m1 = matrix:transpose(dim, 1):contiguous() -- collapse non-dim dimensions: m2 = m1:reshape(m1:size(1), m1:nElement()/m1:size(1)) local norms = m2:norm(value, 2) -- clip local new_norms = norms:clone() new_norms[torch.gt(norms, maxnorm)] = maxnorm new_norms:cdiv(norms:add(1e-7)) -- renormalize m1:cmul(new_norms:expandAs(m1)) return m1:transpose(dim, 1) end ``` `x:renorm(p, dim, maxnorm)` returns the equivalent of `x:copy(torch.renorm(x, p, dim, maxnorm))`. Note: this function is particularly useful as a regularizer for constraining the norm of parameter `Tensor`s. See [Hinton et al. 2012, p. 2](http://arxiv.org/pdf/1207.0580.pdf). ### torch.dist(x, y) ### `y = torch.dist(x, y)` returns the `2`-norm of `x - y`. `y = torch.dist(x, y, p)` returns the `p`-norm of `x - y`. ### torch.numel(x) ### `y = torch.numel(x)` returns the count of the number of elements in the matrix `x`. ### torch.trace(x) ### `y = torch.trace(x)` returns the trace (sum of the diagonal elements) of a matrix `x`. This is equal to the sum of the eigenvalues of `x`. The returned value `y` is a number, not a `Tensor`. ## Convolution Operations ## These functions implement convolution or cross-correlation of an input image (or set of input images) with a kernel (or set of kernels). The convolution function in Torch can handle different types of input/kernel dimensions and produces corresponding outputs. The general form of operations always remain the same. ### [res] torch.conv2([res,] x, k, [, 'F' or 'V']) ### This function computes 2 dimensional convolutions between `x` and `k`. These operations are similar to BLAS operations when number of dimensions of input and kernel are reduced by `2`. * `x` and `k` are 2D: convolution of a single image with a single kernel (2D output). This operation is similar to multiplication of two scalars. * `x` (`p × m × n`) and `k` (`p × ki × kj`) are 3D: convolution of each input slice with corresponding kernel (3D output). * `x` (`p × m × n`) 3D, `k` (`q × p × ki × kj`) 4D: convolution of all input slices with the corresponding slice of kernel. Output is 3D (`q × m × n`). This operation is similar to matrix vector product of matrix `k` and vector `x`. The last argument controls if the convolution is a full (`'F'`) or valid (`'V'`) convolution. The default is **valid** convolution. ```lua x = torch.rand(100, 100) k = torch.rand(10, 10) c = torch.conv2(x, k) > c:size() 91 91 [torch.LongStorage of size 2] c = torch.conv2(x, k, 'F') > c:size() 109 109 [torch.LongStorage of size 2] ``` ### [res] torch.xcorr2([res,] x, k, [, 'F' or 'V']) ### This function operates with same options and input/output configurations as [`torch.conv2`](#torch.conv2), but performs cross-correlation of the input with the kernel `k`. ### [res] torch.conv3([res,] x, k, [, 'F' or 'V']) ### This function computes 3 dimensional convolutions between `x` and `k`. These operations are similar to BLAS operations when number of dimensions of input and kernel are reduced by `3`. * `x` and `k` are 3D: convolution of a single image with a single kernel (3D output). This operation is similar to multiplication of two scalars. * `x` (`p × m × n × o`) and `k` (`p × ki × kj × kk`) are 4D: convolution of each input slice with corresponding kernel (4D output). * `x` (`p × m × n × o`) 4D, `k` (`q × p × ki × kj × kk`) 5D: convolution of all input slices with the corresponding slice of kernel. Output is 4D `q × m × n × o`. This operation is similar to matrix vector product of matrix `k` and vector `x`. The last argument controls if the convolution is a full (`'F'`) or valid (`'V'`) convolution. The default is **valid** convolution. ```lua x = torch.rand(100, 100, 100) k = torch.rand(10, 10, 10) c = torch.conv3(x, k) > c:size() 91 91 91 [torch.LongStorage of size 3] c = torch.conv3(x, k, 'F') > c:size() 109 109 109 [torch.LongStorage of size 3] ``` ### [res] torch.xcorr3([res,] x, k, [, 'F' or 'V']) ### This function operates with same options and input/output configurations as [`torch.conv3`](#torch.conv3), but performs cross-correlation of the input with the kernel `k`. ## Eigenvalues, SVD, Linear System Solution ## Functions in this section are implemented with an interface to [LAPACK](https://en.wikipedia.org/wiki/LAPACK) libraries. If LAPACK libraries are not found during compilation step, then these functions will not be available. ### [x, lu] torch.gesv([resb, resa,] B, A) ### `X, LU = torch.gesv(B, A)` returns the solution of `AX = B` and `LU` contains `L` and `U` factors for `LU` factorization of `A`. `A` has to be a square and non-singular matrix (2D `Tensor`). `A` and `LU` are `m × m`, `X` is `m × k` and `B` is `m × k`. If `resb` and `resa` are given, then they will be used for temporary storage and returning the result. * `resa` will contain `L` and `U` factors for `LU` factorization of `A`. * `resb` will contain the solution `X`. Note: Irrespective of the original strides, the returned matrices `resb` and `resa` will be transposed, i.e. with strides `1, m` instead of `m, 1`. ```lua > a = torch.Tensor({{6.80, -2.11, 5.66, 5.97, 8.23}, {-6.05, -3.30, 5.36, -4.44, 1.08}, {-0.45, 2.58, -2.70, 0.27, 9.04}, {8.32, 2.71, 4.35, -7.17, 2.14}, {-9.67, -5.14, -7.26, 6.08, -6.87}}):t() > b = torch.Tensor({{4.02, 6.19, -8.22, -7.57, -3.03}, {-1.56, 4.00, -8.67, 1.75, 2.86}, {9.81, -4.09, -4.57, -8.61, 8.99}}):t() > b 4.0200 -1.5600 9.8100 6.1900 4.0000 -4.0900 -8.2200 -8.6700 -4.5700 -7.5700 1.7500 -8.6100 -3.0300 2.8600 8.9900 [torch.DoubleTensor of dimension 5x3] > a 6.8000 -6.0500 -0.4500 8.3200 -9.6700 -2.1100 -3.3000 2.5800 2.7100 -5.1400 5.6600 5.3600 -2.7000 4.3500 -7.2600 5.9700 -4.4400 0.2700 -7.1700 6.0800 8.2300 1.0800 9.0400 2.1400 -6.8700 [torch.DoubleTensor of dimension 5x5] > x = torch.gesv(b, a) > x -0.8007 -0.3896 0.9555 -0.6952 -0.5544 0.2207 0.5939 0.8422 1.9006 1.3217 -0.1038 5.3577 0.5658 0.1057 4.0406 [torch.DoubleTensor of dimension 5x3] > b:dist(a * x) 1.1682163181673e-14 ``` ### [x] torch.trtrs([resb, resa,] b, a [, 'U' or 'L'] [, 'N' or 'T'] [, 'N' or 'U']) ### `X = torch.trtrs(B, A)` returns the solution of `AX = B` where `A` is upper-triangular. `A` has to be a square, triangular, non-singular matrix (2D `Tensor`). `A` and `resa` are `m × m`, `X` and `B` are `m × k`. (To be very precise: `A` does not have to be triangular and non-singular, rather only its upper or lower triangle will be taken into account and that part has to be non-singular.) The function has several options: * `uplo` (`'U'` or `'L'`) specifies whether `A` is upper or lower triangular; the default value is `'U'`. * `trans` (`'N'` or `'T`') specifies the system of equations: `'N'` for `A * X = B` (no transpose), or `'T'` for `A^T * X = B` (transpose); the default value is `'N'`. * `diag` (`'N'` or `'U'`) `'U'` specifies that `A` is unit triangular, i.e., it has ones on its diagonal; `'N'` specifies that `A` is not (necessarily) unit triangular; the default value is `'N'`. If `resb` and `resa` are given, then they will be used for temporary storage and returning the result. `resb` will contain the solution `X`. Note: Irrespective of the original strides, the returned matrices `resb` and `resa` will be transposed, i.e. with strides `1, m` instead of `m, 1`. ```lua > a = torch.Tensor({{6.80, -2.11, 5.66, 5.97, 8.23}, {0, -3.30, 5.36, -4.44, 1.08}, {0, 0, -2.70, 0.27, 9.04}, {0, 0, 0, -7.17, 2.14}, {0, 0, 0, 0, -6.87}}) > b = torch.Tensor({{4.02, 6.19, -8.22, -7.57, -3.03}, {-1.56, 4.00, -8.67, 1.75, 2.86}, {9.81, -4.09, -4.57, -8.61, 8.99}}):t() > b 4.0200 -1.5600 9.8100 6.1900 4.0000 -4.0900 -8.2200 -8.6700 -4.5700 -7.5700 1.7500 -8.6100 -3.0300 2.8600 8.9900 [torch.DoubleTensor of dimension 5x3] > a 6.8000 -2.1100 5.6600 5.9700 8.2300 0.0000 -3.3000 5.3600 -4.4400 1.0800 0.0000 0.0000 -2.7000 0.2700 9.0400 0.0000 0.0000 0.0000 -7.1700 2.1400 0.0000 0.0000 0.0000 0.0000 -6.8700 [torch.DoubleTensor of dimension 5x5] > x = torch.trtrs(b, a) > x -3.5416 -0.2514 3.0847 4.2072 2.0391 -4.5146 4.6399 1.7804 -2.6077 1.1874 -0.3683 0.8103 0.4410 -0.4163 -1.3086 [torch.DoubleTensor of size 5x3] > b:dist(a*x) 4.1895292266754e-15 ``` ### torch.potrf([res,] A [, 'U' or 'L'] ) ### Cholesky Decomposition of 2D `Tensor` `A`. The matrix `A` has to be a positive-definite and either symmetric or complex Hermitian. The factorization has the form A = U**T * U, if UPLO = 'U', or A = L * L**T, if UPLO = 'L', where `U` is an upper triangular matrix and `L` is lower triangular. The optional character `uplo` = {'U', 'L'} specifies whether the upper or lower triangulardecomposition should be returned. By default, `uplo` = 'U'. `U = torch.potrf(A, 'U')` returns the upper triangular Cholesky decomposition of `A`. `L = torch.potrf(A, 'L')` returns the lower triangular Cholesky decomposition of `A`. If `Tensor` `res` is provided, the resulting decomposition will be stored therein. ```lua > A = torch.Tensor({ {1.2705, 0.9971, 0.4948, 0.1389, 0.2381}, {0.9971, 0.9966, 0.6752, 0.0686, 0.1196}, {0.4948, 0.6752, 1.1434, 0.0314, 0.0582}, {0.1389, 0.0686, 0.0314, 0.0270, 0.0526}, {0.2381, 0.1196, 0.0582, 0.0526, 0.3957}}) > chol = torch.potrf(A) > chol 1.1272 0.8846 0.4390 0.1232 0.2112 0.0000 0.4626 0.6200 -0.0874 -0.1453 0.0000 0.0000 0.7525 0.0419 0.0738 0.0000 0.0000 0.0000 0.0491 0.2199 0.0000 0.0000 0.0000 0.0000 0.5255 [torch.DoubleTensor of size 5x5] > torch.potrf(chol, A, 'L') > chol 1.1272 0.0000 0.0000 0.0000 0.0000 0.8846 0.4626 0.0000 0.0000 0.0000 0.4390 0.6200 0.7525 0.0000 0.0000 0.1232 -0.0874 0.0419 0.0491 0.0000 0.2112 -0.1453 0.0738 0.2199 0.5255 [torch.DoubleTensor of size 5x5] ``` ### torch.pstrf([res, piv, ] A [, 'U' or 'L'] ) ### Cholesky factorization with complete pivoting of a real symmetric positive semidefinite 2D `Tensor` `A`. The matrix `A` has to be a positive semi-definite and symmetric. The factorization has the form P**T * A * P = U**T * U , if UPLO = 'U', P**T * A * P = L * L**T, if UPLO = 'L', where `U` is an upper triangular matrix and `L` is lower triangular, and `P` is stored as the vector `piv`. More specifically, `piv` is such that the nonzero entries are `P[piv[k], k] = 1`. The optional character argument `uplo` = {'U', 'L'} specifies whether the upper or lower triangular decomposition should be returned. By default, `uplo` = 'U'. `U, piv = torch.sdtrf(A, 'U')` returns the upper triangular Cholesky decomposition of `A` `L, piv = torch.potrf(A, 'L')` returns the lower triangular Cholesky decomposition of `A`. If tensors `res` and `piv` (an `IntTensor`) are provided, the resulting decomposition will be stored therein. ```lua > A = torch.Tensor({ {1.2705, 0.9971, 0.4948, 0.1389, 0.2381}, {0.9971, 0.9966, 0.6752, 0.0686, 0.1196}, {0.4948, 0.6752, 1.1434, 0.0314, 0.0582}, {0.1389, 0.0686, 0.0314, 0.0270, 0.0526}, {0.2381, 0.1196, 0.0582, 0.0526, 0.3957}}) > U, piv = torch.pstrf(A) > U 1.1272 0.4390 0.2112 0.8846 0.1232 0.0000 0.9750 -0.0354 0.2942 -0.0233 0.0000 0.0000 0.5915 -0.0961 0.0435 0.0000 0.0000 0.0000 0.3439 -0.0854 0.0000 0.0000 0.0000 0.0000 0.0456 [torch.DoubleTensor of size 5x5] > piv 1 3 5 2 4 [torch.IntTensor of size 5] > Ap = U:t() * U > Ap 1.2705 0.4948 0.2381 0.9971 0.1389 0.4948 1.1434 0.0582 0.6752 0.0314 0.2381 0.0582 0.3957 0.1196 0.0526 0.9971 0.6752 0.1196 0.9966 0.0686 0.1389 0.0314 0.0526 0.0686 0.0270 [torch.DoubleTensor of size 5x5] > -- Permute rows and columns > Ap:indexCopy(1, piv:long(), Ap:clone()) > Ap:indexCopy(2, piv:long(), Ap:clone()) > (Ap - A):norm() 1.5731560566382e-16 ``` ### torch.potrs([res,] B, chol [, 'U' or 'L'] ) ### Returns the solution to linear system `AX = B` using the Cholesky decomposition `chol` of 2D `Tensor` `A`. Square matrix `chol` should be triangular; and, righthand side matrix `B` should be of full rank. Optional character `uplo` = {'U', 'L'} specifies matrix `chol` as either upper or lower triangular; and, by default, equals 'U'. If `Tensor` `res` is provided, the resulting decomposition will be stored therein. ```lua > A = torch.Tensor({ {1.2705, 0.9971, 0.4948, 0.1389, 0.2381}, {0.9971, 0.9966, 0.6752, 0.0686, 0.1196}, {0.4948, 0.6752, 1.1434, 0.0314, 0.0582}, {0.1389, 0.0686, 0.0314, 0.0270, 0.0526}, {0.2381, 0.1196, 0.0582, 0.0526, 0.3957}}) > B = torch.Tensor({ {0.6219, 0.3439, 0.0431}, {0.5642, 0.1756, 0.0153}, {0.2334, 0.8594, 0.4103}, {0.7556, 0.1966, 0.9637}, {0.1420, 0.7185, 0.7476}}) > chol = torch.potrf(A) > chol 1.1272 0.8846 0.4390 0.1232 0.2112 0.0000 0.4626 0.6200 -0.0874 -0.1453 0.0000 0.0000 0.7525 0.0419 0.0738 0.0000 0.0000 0.0000 0.0491 0.2199 0.0000 0.0000 0.0000 0.0000 0.5255 [torch.DoubleTensor of size 5x5] > solve = torch.potrs(B, chol) > solve 12.1945 61.8622 92.6882 -11.1782 -97.0303 -138.4874 -15.3442 -76.6562 -116.8218 6.1930 13.5238 25.2056 29.9678 251.7346 360.2301 [torch.DoubleTensor of size 5x3] > A*solve 0.6219 0.3439 0.0431 0.5642 0.1756 0.0153 0.2334 0.8594 0.4103 0.7556 0.1966 0.9637 0.1420 0.7185 0.7476 [torch.DoubleTensor of size 5x3] > B:dist(A*solve) 4.6783066076306e-14 ``` ### torch.potri([res,] chol [, 'U' or 'L'] ) ### Returns the inverse of 2D `Tensor` `A` given its Cholesky decomposition `chol`. Square matrix `chol` should be triangular. Optional character `uplo` = {'U', 'L'} specifies matrix `chol` as either upper or lower triangular; and, by default, equals 'U'. If `Tensor` `res` is provided, the resulting inverse will be stored therein. ```lua > A = torch.Tensor({ {1.2705, 0.9971, 0.4948, 0.1389, 0.2381}, {0.9971, 0.9966, 0.6752, 0.0686, 0.1196}, {0.4948, 0.6752, 1.1434, 0.0314, 0.0582}, {0.1389, 0.0686, 0.0314, 0.0270, 0.0526}, {0.2381, 0.1196, 0.0582, 0.0526, 0.3957}}) > chol = torch.potrf(A) > chol 1.1272 0.8846 0.4390 0.1232 0.2112 0.0000 0.4626 0.6200 -0.0874 -0.1453 0.0000 0.0000 0.7525 0.0419 0.0738 0.0000 0.0000 0.0000 0.0491 0.2199 0.0000 0.0000 0.0000 0.0000 0.5255 [torch.DoubleTensor of size 5x5] > inv = torch.potri(chol) > inv 42.2781 -39.0824 8.3019 -133.4998 2.8980 -39.0824 38.1222 -8.7468 119.4247 -2.5944 8.3019 -8.7468 3.1104 -25.1405 0.5327 -133.4998 119.4247 -25.1405 480.7511 -15.9747 2.8980 -2.5944 0.5327 -15.9747 3.6127 [torch.DoubleTensor of size 5x5] > inv:dist(torch.inverse(A)) 2.8525852877633e-12 ``` ### torch.gels([resb, resa,] b, a) ### Solution of least squares and least norm problems for a full rank `m × n` matrix `A`. * If `n ≤ m`, then solve `||AX-B||_F`. * If `n > m` , then solve `min ||X||_F` s.t. `AX = B`. On return, first `n` rows of `x` matrix contains the solution and the rest contains residual information. Square root of sum squares of elements of each column of `x` starting at row `n + 1` is the residual for corresponding column. Note: Irrespective of the original strides, the returned matrices `resb` and `resa` will be transposed, i.e. with strides `1, m` instead of `m, 1`. ```lua > a = torch.Tensor({{ 1.44, -9.96, -7.55, 8.34, 7.08, -5.45}, {-7.84, -0.28, 3.24, 8.09, 2.52, -5.70}, {-4.39, -3.24, 6.27, 5.28, 0.74, -1.19}, {4.53, 3.83, -6.64, 2.06, -2.47, 4.70}}):t() > b = torch.Tensor({{8.58, 8.26, 8.48, -5.28, 5.72, 8.93}, {9.35, -4.43, -0.70, -0.26, -7.36, -2.52}}):t() > a 1.4400 -7.8400 -4.3900 4.5300 -9.9600 -0.2800 -3.2400 3.8300 -7.5500 3.2400 6.2700 -6.6400 8.3400 8.0900 5.2800 2.0600 7.0800 2.5200 0.7400 -2.4700 -5.4500 -5.7000 -1.1900 4.7000 [torch.DoubleTensor of dimension 6x4] > b 8.5800 9.3500 8.2600 -4.4300 8.4800 -0.7000 -5.2800 -0.2600 5.7200 -7.3600 8.9300 -2.5200 [torch.DoubleTensor of dimension 6x2] > x = torch.gels(b, a) > x -0.4506 0.2497 -0.8492 -0.9020 0.7066 0.6323 0.1289 0.1351 13.1193 -7.4922 -4.8214 -7.1361 [torch.DoubleTensor of dimension 6x2] > b:dist(a*x:narrow(1, 1, 4)) 17.390200628863 > math.sqrt(x:narrow(1, 5, 2):pow(2):sumall()) 17.390200628863 ``` ### torch.symeig([rese, resv,] a [, 'N' or 'V'] [, 'U' or 'L']) ### `e, V = torch.symeig(A)` returns eigenvalues and eigenvectors of a symmetric real matrix `A`. `A` and `V` are `m × m` matrices and `e` is a `m` dimensional vector. This function calculates all eigenvalues (and vectors) of `A` such that `A = V diag(e) V'`. Third argument defines computation of eigenvectors or eigenvalues only. If it is `'N'`, only eigenvalues are computed. If it is `'V'`, both eigenvalues and eigenvectors are computed. Since the input matrix `A` is supposed to be symmetric, only upper triangular portion is used by default. If the 4th argument is `'L'`, then lower triangular portion is used. Note: Irrespective of the original strides, the returned matrix `V` will be transposed, i.e. with strides `1, m` instead of `m, 1`. ```lua > a = torch.Tensor({{ 1.96, 0.00, 0.00, 0.00, 0.00}, {-6.49, 3.80, 0.00, 0.00, 0.00}, {-0.47, -6.39, 4.17, 0.00, 0.00}, {-7.20, 1.50, -1.51, 5.70, 0.00}, {-0.65, -6.34, 2.67, 1.80, -7.10}}):t() > a 1.9600 -6.4900 -0.4700 -7.2000 -0.6500 0.0000 3.8000 -6.3900 1.5000 -6.3400 0.0000 0.0000 4.1700 -1.5100 2.6700 0.0000 0.0000 0.0000 5.7000 1.8000 0.0000 0.0000 0.0000 0.0000 -7.1000 [torch.DoubleTensor of dimension 5x5] > e = torch.symeig(a) > e -11.0656 -6.2287 0.8640 8.8655 16.0948 [torch.DoubleTensor of dimension 5] > e, v = torch.symeig(a, 'V') > e -11.0656 -6.2287 0.8640 8.8655 16.0948 [torch.DoubleTensor of dimension 5] > v -0.2981 -0.6075 0.4026 -0.3745 0.4896 -0.5078 -0.2880 -0.4066 -0.3572 -0.6053 -0.0816 -0.3843 -0.6600 0.5008 0.3991 -0.0036 -0.4467 0.4553 0.6204 -0.4564 -0.8041 0.4480 0.1725 0.3108 0.1622 [torch.DoubleTensor of dimension 5x5] > v*torch.diag(e)*v:t() 1.9600 -6.4900 -0.4700 -7.2000 -0.6500 -6.4900 3.8000 -6.3900 1.5000 -6.3400 -0.4700 -6.3900 4.1700 -1.5100 2.6700 -7.2000 1.5000 -1.5100 5.7000 1.8000 -0.6500 -6.3400 2.6700 1.8000 -7.1000 [torch.DoubleTensor of dimension 5x5] > a:dist(torch.triu(v*torch.diag(e)*v:t())) 1.0219480822443e-14 ``` ### torch.eig([rese, resv,] a [, 'N' or 'V']) ### `e, V = torch.eig(A)` returns eigenvalues and eigenvectors of a general real square matrix `A`. `A` and `V` are `m × m` matrices and `e` is a `m` dimensional vector. This function calculates all right eigenvalues (and vectors) of `A` such that `A = V diag(e) V'`. Third argument defines computation of eigenvectors or eigenvalues only. If it is `'N'`, only eigenvalues are computed. If it is `'V'`, both eigenvalues and eigenvectors are computed. The eigen values returned follow [LAPACK convention](https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-16EB5901-5644-4DA6-A332-A052309010C4.htm) and are returned as complex (real/imaginary) pairs of numbers (`2 * m` dimensional `Tensor`). Note: Irrespective of the original strides, the returned matrix `V` will be transposed, i.e. with strides `1, m` instead of `m, 1`. ```lua > a = torch.Tensor({{ 1.96, 0.00, 0.00, 0.00, 0.00}, {-6.49, 3.80, 0.00, 0.00, 0.00}, {-0.47, -6.39, 4.17, 0.00, 0.00}, {-7.20, 1.50, -1.51, 5.70, 0.00}, {-0.65, -6.34, 2.67, 1.80, -7.10}}):t() > a 1.9600 -6.4900 -0.4700 -7.2000 -0.6500 0.0000 3.8000 -6.3900 1.5000 -6.3400 0.0000 0.0000 4.1700 -1.5100 2.6700 0.0000 0.0000 0.0000 5.7000 1.8000 0.0000 0.0000 0.0000 0.0000 -7.1000 [torch.DoubleTensor of dimension 5x5] > b = a + torch.triu(a, 1):t() > b 1.9600 -6.4900 -0.4700 -7.2000 -0.6500 -6.4900 3.8000 -6.3900 1.5000 -6.3400 -0.4700 -6.3900 4.1700 -1.5100 2.6700 -7.2000 1.5000 -1.5100 5.7000 1.8000 -0.6500 -6.3400 2.6700 1.8000 -7.1000 [torch.DoubleTensor of dimension 5x5] > e = torch.eig(b) > e 16.0948 0.0000 -11.0656 0.0000 -6.2287 0.0000 0.8640 0.0000 8.8655 0.0000 [torch.DoubleTensor of dimension 5x2] > e, v = torch.eig(b, 'V') > e 16.0948 0.0000 -11.0656 0.0000 -6.2287 0.0000 0.8640 0.0000 8.8655 0.0000 [torch.DoubleTensor of dimension 5x2] > v -0.4896 0.2981 -0.6075 -0.4026 -0.3745 0.6053 0.5078 -0.2880 0.4066 -0.3572 -0.3991 0.0816 -0.3843 0.6600 0.5008 0.4564 0.0036 -0.4467 -0.4553 0.6204 -0.1622 0.8041 0.4480 -0.1725 0.3108 [torch.DoubleTensor of dimension 5x5] > v * torch.diag(e:select(2, 1))*v:t() 1.9600 -6.4900 -0.4700 -7.2000 -0.6500 -6.4900 3.8000 -6.3900 1.5000 -6.3400 -0.4700 -6.3900 4.1700 -1.5100 2.6700 -7.2000 1.5000 -1.5100 5.7000 1.8000 -0.6500 -6.3400 2.6700 1.8000 -7.1000 [torch.DoubleTensor of dimension 5x5] > b:dist(v * torch.diag(e:select(2, 1)) * v:t()) 3.5423944346685e-14 ``` ### torch.svd([resu, ress, resv,] a [, 'S' or 'A']) ### `U, S, V = torch.svd(A)` returns the singular value decomposition of a real matrix `A` of size `n × m` such that `A = USV'*`. `U` is `n × n`, `S` is `n × m` and `V` is `m × m`. The last argument, if it is string, represents the number of singular values to be computed. `'S'` stands for *some* and `'A'` stands for *all*. Note: Irrespective of the original strides, the returned matrix `U` will be transposed, i.e. with strides `1, n` instead of `n, 1`. ```lua > a = torch.Tensor({{8.79, 6.11, -9.15, 9.57, -3.49, 9.84}, {9.93, 6.91, -7.93, 1.64, 4.02, 0.15}, {9.83, 5.04, 4.86, 8.83, 9.80, -8.99}, {5.45, -0.27, 4.85, 0.74, 10.00, -6.02}, {3.16, 7.98, 3.01, 5.80, 4.27, -5.31}}):t() > a 8.7900 9.9300 9.8300 5.4500 3.1600 6.1100 6.9100 5.0400 -0.2700 7.9800 -9.1500 -7.9300 4.8600 4.8500 3.0100 9.5700 1.6400 8.8300 0.7400 5.8000 -3.4900 4.0200 9.8000 10.0000 4.2700 9.8400 0.1500 -8.9900 -6.0200 -5.3100 > u, s, v = torch.svd(a) > u -0.5911 0.2632 0.3554 0.3143 0.2299 -0.3976 0.2438 -0.2224 -0.7535 -0.3636 -0.0335 -0.6003 -0.4508 0.2334 -0.3055 -0.4297 0.2362 -0.6859 0.3319 0.1649 -0.4697 -0.3509 0.3874 0.1587 -0.5183 0.2934 0.5763 -0.0209 0.3791 -0.6526 [torch.DoubleTensor of dimension 6x5] > s 27.4687 22.6432 8.5584 5.9857 2.0149 [torch.DoubleTensor of dimension 5] > v -0.2514 0.8148 -0.2606 0.3967 -0.2180 -0.3968 0.3587 0.7008 -0.4507 0.1402 -0.6922 -0.2489 -0.2208 0.2513 0.5891 -0.3662 -0.3686 0.3859 0.4342 -0.6265 -0.4076 -0.0980 -0.4933 -0.6227 -0.4396 [torch.DoubleTensor of dimension 5x5] > u * torch.diag(s) * v:t() 8.7900 9.9300 9.8300 5.4500 3.1600 6.1100 6.9100 5.0400 -0.2700 7.9800 -9.1500 -7.9300 4.8600 4.8500 3.0100 9.5700 1.6400 8.8300 0.7400 5.8000 -3.4900 4.0200 9.8000 10.0000 4.2700 9.8400 0.1500 -8.9900 -6.0200 -5.3100 [torch.DoubleTensor of dimension 6x5] > a:dist(u * torch.diag(s) * v:t()) 2.8923773593204e-14 ``` ### torch.inverse([res,] x) ### Computes the inverse of square matrix `x`. `torch.inverse(x)` returns the result as a new matrix. `torch.inverse(y, x)` puts the result in `y`. Note: Irrespective of the original strides, the returned matrix `y` will be transposed, i.e. with strides `1, m` instead of `m, 1`. ```lua > x = torch.rand(10, 10) > y = torch.inverse(x) > z = x * y > z 1.0000 -0.0000 0.0000 -0.0000 0.0000 0.0000 0.0000 -0.0000 0.0000 0.0000 0.0000 1.0000 -0.0000 -0.0000 0.0000 0.0000 -0.0000 -0.0000 -0.0000 0.0000 0.0000 -0.0000 1.0000 -0.0000 0.0000 0.0000 -0.0000 -0.0000 0.0000 0.0000 0.0000 -0.0000 -0.0000 1.0000 -0.0000 0.0000 0.0000 -0.0000 -0.0000 0.0000 0.0000 -0.0000 0.0000 -0.0000 1.0000 0.0000 0.0000 -0.0000 -0.0000 0.0000 0.0000 -0.0000 0.0000 -0.0000 0.0000 1.0000 0.0000 -0.0000 -0.0000 0.0000 0.0000 -0.0000 0.0000 -0.0000 0.0000 0.0000 1.0000 -0.0000 0.0000 0.0000 0.0000 -0.0000 -0.0000 -0.0000 0.0000 0.0000 0.0000 1.0000 0.0000 0.0000 0.0000 -0.0000 -0.0000 -0.0000 0.0000 0.0000 -0.0000 -0.0000 1.0000 0.0000 0.0000 -0.0000 0.0000 -0.0000 0.0000 0.0000 0.0000 -0.0000 0.0000 1.0000 [torch.DoubleTensor of dimension 10x10] > torch.max(torch.abs(z - torch.eye(10))) -- Max nonzero 2.3092638912203e-14 ``` ### torch.qr([q, r], x) ### Compute a QR decomposition of the matrix `x`: matrices `q` and `r` such that `x = q * r`, with `q` orthogonal and `r` upper triangular. This returns the thin (reduced) QR factorization. `torch.qr(x)` returns the Q and R components as new matrices. `torch.qr(q, r, x)` stores them in existing `Tensor`s `q` and `r`. Note that precision may be lost if the magnitudes of the elements of `x` are large. Note also that, while it should always give you a valid decomposition, it may not give you the same one across platforms - it will depend on your LAPACK implementation. Note: Irrespective of the original strides, the returned matrix `q` will be transposed, i.e. with strides `1, m` instead of `m, 1`. ```lua > a = torch.Tensor{{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}} > a 12 -51 4 6 167 -68 -4 24 -41 [torch.DoubleTensor of dimension 3x3] > q, r = torch.qr(a) > q -0.8571 0.3943 0.3314 -0.4286 -0.9029 -0.0343 0.2857 -0.1714 0.9429 [torch.DoubleTensor of dimension 3x3] > r -14.0000 -21.0000 14.0000 0.0000 -175.0000 70.0000 0.0000 0.0000 -35.0000 [torch.DoubleTensor of dimension 3x3] > (q * r):round() 12 -51 4 6 167 -68 -4 24 -41 [torch.DoubleTensor of dimension 3x3] > (q:t() * q):round() 1 0 0 0 1 0 0 0 1 [torch.DoubleTensor of dimension 3x3] ``` ### torch.geqrf([m, tau], a) ### This is a low-level function for calling LAPACK directly. You'll generally want to use `torch.qr()` instead. Computes a QR decomposition of `a`, but without constructing Q and R as explicit separate matrices. Rather, this directly calls the underlying LAPACK function `?geqrf` which produces a sequence of 'elementary reflectors'. See [LAPACK documentation](https://software.intel.com/en-us/node/521004) for further details. ### torch.orgqr([q], m, tau) ### This is a low-level function for calling LAPACK directly. You'll generally want to use `torch.qr()` instead. Constructs a Q matrix from a sequence of elementary reflectors, such as that given by `torch.geqrf`. See [LAPACK documentation](https://software.intel.com/en-us/node/521010) for further details. ### torch.ormqr([res], m, tau, mat [, 'L' or 'R'] [, 'N' or 'T']) ### Multiply a matrix with `Q` as defined by the elementary reflectors and scalar factors returned by `geqrf`. This is a low-level function for calling LAPACK directly. You'll generally want to use `torch.qr()` instead. * `side` (`'L'` or `'R'`) specifies whether `mat` should be left-multiplied, `mat * Q`, or right-multiplied, `Q * mat`. * `trans` (`'N'` or `'T`') specifies whether `Q` should be transposed before being multiplied. See [LAPACK documentation](https://software.intel.com/en-us/node/521011) for further details. ## Logical Operations on `Tensor`s ## These functions implement logical comparison operators that take a `Tensor` as input and another `Tensor` or a number as the comparison target. They return a `ByteTensor` in which each element is `0` or `1` indicating if the comparison for the corresponding element was `false` or `true` respectively. ### torch.lt(a, b) ### Implements `<` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`. ### torch.le(a, b) ### Implements `<=` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`. ### torch.gt(a, b) ### Implements `>` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`. ### torch.ge(a, b) ### Implements `>=` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`. ### torch.eq(a, b) ### Implements `==` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`. ### torch.ne(a, b) ### Implements `~=` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`. ### torch.all(a) ### ### torch.any(a) ### Additionally, `any` and `all` logically sum a `ByteTensor` returning `true` if any or all elements are logically true respectively. Note that logically true here is meant in the C sense (zero is `false`, non-zero is `true`) such as the output of the `Tensor` element-wise logical operations. ```lua > a = torch.rand(10) > b = torch.rand(10) > a 0.5694 0.5264 0.3041 0.4159 0.1677 0.7964 0.0257 0.2093 0.6564 0.0740 [torch.DoubleTensor of dimension 10] > b 0.2950 0.4867 0.9133 0.1291 0.1811 0.3921 0.7750 0.3259 0.2263 0.1737 [torch.DoubleTensor of dimension 10] > torch.lt(a, b) 0 0 1 0 1 0 1 1 0 1 [torch.ByteTensor of dimension 10] > torch.eq(a, b) 0 0 0 0 0 0 0 0 0 0 [torch.ByteTensor of dimension 10] > torch.ne(a, b) 1 1 1 1 1 1 1 1 1 1 [torch.ByteTensor of dimension 10] > torch.gt(a, b) 1 1 0 1 0 1 0 0 1 0 [torch.ByteTensor of dimension 10] > a[torch.gt(a, b)] = 10 > a 10.0000 10.0000 0.3041 10.0000 0.1677 10.0000 0.0257 0.2093 10.0000 0.0740 [torch.DoubleTensor of dimension 10] > a[torch.gt(a, 1)] = -1 > a -1.0000 -1.0000 0.3041 -1.0000 0.1677 -1.0000 0.0257 0.2093 -1.0000 0.0740 [torch.DoubleTensor of dimension 10] > a = torch.ones(3):byte() > torch.all(a) true > a[2] = 0 > torch.all(a) false > torch.any(a) true > a:zero() > torch.any(a) false ``` doc/memoryfile.md000066400000000000000000000030271316246254300143250ustar00rootroot00000000000000 # MemoryFile # Parent classes: [File](file.md) A `MemoryFile` is a particular `File` which is able to perform basic read/write operations on a buffer in `RAM`. It implements all methods described in [File](file.md). The data of the `File` is contained into a `NULL` terminated [CharStorage](storage.md). ### torch.MemoryFile([mode]) ### _Constructor_ which returns a new `MemoryFile` object using `mode`. Valid `mode` are `"r"` (read), `"w"` (write) or `"rw"` (read-write). Default is `"rw"`. ### torch.MemoryFile(storage, mode) ### _Constructor_ which returns a new `MemoryFile` object, using the given [storage](storage.md) (which must be a `CharStorage`) and `mode`. Valid `mode` are `"r"` (read), `"w"` (write) or `"rw"` (read-write). The last character in this storage _must_ be `NULL` or an error will be generated. This allows to read existing memory. If used for writing, note that the `storage` might be resized by this class if needed. ### [CharStorage] storage() ### Returns the [storage](storage.md) which contains all the data of the `File` (note: this is _not_ a copy, but a _reference_ on this storage). The size of the storage is the size of the data in the `File`, plus one, the last character being `NULL`. ### longSize([size]) ### Longs will be written and read from the file as `size` bytes long, which can be 0, 4 or 8. 0 means system default. doc/pipefile.md000066400000000000000000000015421316246254300137520ustar00rootroot00000000000000 # PipeFile # Parent classes: [DiskFile](diskfile.md) A `PipeFile` is a particular `File` which is able to perform basic read/write operations on a command pipe. It implements all methods described in [DiskFile](diskfile.md) and [File](file.md). The file might be open in read or write mode, depending on the parameter `mode` (which can take the value `"r"` or `"w"`) given to the [torch.PipeFile(fileName, mode)](#torch.PipeFile). Read-write mode is not allowed. ### torch.PipeFile(command, [mode], [quiet]) ### _Constructor_ which executes `command` by opening a pipe in read or write `mode`. Valid `mode`s are `"r"` (read) or `"w"` (write). Default is read mode. If (and only if) `quiet` is `true`, no error will be raised in case of problem opening the file: instead `nil` will be returned. doc/random.md000066400000000000000000000125611316246254300134400ustar00rootroot00000000000000 # Random Numbers # Torch provides accurate mathematical random generation, based on [Mersenne Twister](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html) random number generator. ## Generator handling ## All of the below functions, as well as [randn()](maths.md#torch.randn), [rand()](maths.md#torch.rand) and [randperm()](maths.md#torch.randperm), take as optional first argument a random number generator. If this argument is not provided, the default global RNG is used. A non-global RNG can be obtained with [Generator()](#torch.Generator). Each RNG has its own state, independent from all other RNG's states. ``` -- Seed the global RNG > torch.manualSeed(0) > torch.random() 2357136044 -- Creates and seed a non-global RNG > gen = torch.Generator() > torch.manualSeed(gen, 0) > torch.random(gen) 2357136044 > torch.random(gen) 2546248239 > torch.random() 2546248239 ``` ## Seed Handling ## The random number generator is provided with a random seed via [seed()](#torch.seed) when torch is being initialized. It can be reinitialized using [seed()](#torch.seed) or [manualSeed()](#torch.manualSeed). Initial seed can be obtained using [initialSeed()](#torch.initialSeed). Setting a particular seed allows the user to (re)-generate a particular sequence of random numbers. Example: ``` > torch.manualSeed(123) > = torch.uniform() 0.69646918727085 > return torch.uniform() 0.71295532141812 > return torch.uniform() 0.28613933874294 > torch.manualSeed(123) > return torch.uniform() 0.69646918727085 > return torch.uniform() 0.71295532141812 > return torch.uniform() 0.28613933874294 > torch.manualSeed(torch.initialSeed()) > return torch.uniform() 0.69646918727085 > return torch.uniform() 0.71295532141812 > return torch.uniform() 0.28613933874294 ``` To regenerate a sequence of random numbers starting from a specific point in the sequence, one can save the state of the random number generator using [getRNGState()](#torch.getRNGState) and then reset the random number generator to that state using [setRNGState()](#torch.setRNGState). Example: ``` > torch.manualSeed(123) > = torch.uniform() 0.69646918727085 > s = torch.getRNGState() > return torch.uniform() 0.71295532141812 > return torch.uniform() 0.28613933874294 > torch.setRNGState(s) > return torch.uniform() 0.71295532141812 > return torch.uniform() 0.28613933874294 ``` ### [Generator] Generator() ### Creates a non-global random generator that carries its own state and can be passed as the first argument to any function that generates a random number. ### [number] seed([gen,]) ### Set the seed of the random number generator using `/dev/urandom` (on Windows the time of the computer with granularity of seconds is used). Returns the seed obtained. ### manualSeed([gen,] number) ### Set the seed of the random number generator to the given `number`. ### initialSeed([gen]) ### Returns the initial seed used to initialize the random generator. ### [Tensor] getRNGState([gen]) ### Returns the current state of the random number generator as a torch.ByteTensor. This can then be used to set the state of the RNG so that the same sequence of random numbers is produced. ### [Tensor] setRNGState([gen,] state) ### Sets the state of the random number generator. If `state` was obtained earlier using `getRNGState` then the random number generator should now generate the same numbers as it did from the point where `state` was obtained. This function returns its argument `state`. ### [number] random([gen,] [a], [b]) ### Returns an unsigned 32 bit integer random number from `[a,b]`. By default `a` is `1` and `b` is `2^32`. ### [number] uniform([gen,] [a],[b]) ### Returns a random real number according to uniform distribution on `[a,b)`. By default `a` is `0` and `b` is `1`. ### [number] normal([gen,] [mean],[stdv]) ### Returns a random real number according to a normal distribution with the given `mean` and standard deviation `stdv`. `stdv` must be positive. ### [number] exponential([gen,] lambda) ### Returns a random real number according to the exponential distribution `p(x) = lambda * exp(-lambda * x)` ### [number] cauchy([gen,] median, sigma) ### Returns a random real number according to the Cauchy distribution `p(x) = sigma/(pi*(sigma^2 + (x-median)^2))` ### [number] logNormal([gen,] mean, stdv) ### Returns a random real number according to the log-normal distribution, with the given `mean` and standard deviation `stdv`. `mean` and `stdv` are the corresponding mean and standard deviation of the underlying normal distribution, and not of the returned distribution. `stdv` must be positive. ### [number] geometric([gen,] p) ### Returns a random integer number according to a geometric distribution `p(i) = (1-p) * p^(i-1)`. `p` must satisfy `0 < p < 1`. ### [number] bernoulli([gen,] [p]) ### Returns `1` with probability `p` and `0` with probability `1-p`. `p` must satisfy `0 <= p <= 1`. By default `p` is equal to `0.5`. doc/serialization.md000066400000000000000000000070531316246254300150350ustar00rootroot00000000000000 # Serialization # Torch provides 4 high-level methods to serialize/deserialize arbitrary Lua/Torch objects. These functions are just abstractions over the [File](file.md) object, and were created for convenience (these are very common routines). The first two functions are useful to serialize/deserialize data to/from files: - `torch.save(filename, object [, format, referenced])` - `[object] torch.load(filename [, format, referenced])` The next two functions are useful to serialize/deserialize data to/from strings: - `[str] torch.serialize(object)` - `[object] torch.deserialize(str)` Serializing to files is useful to save arbitrary data structures, or share them with other people. Serializing to strings is useful to store arbitrary data structures in databases, or 3rd party software. ### torch.save(filename, object [, format, referenced]) ### Writes `object` into a file named `filename`. The `format` can be set to `ascii` or `binary` (default is binary). Binary format is platform dependent, but typically more compact and faster to read/write. The ASCII format is platform-independent, and should be used to share data structures across platforms. The option `referenced` specifies if [object references](file.md#torch.File.referenced) should be tracked or not (`true` by default). ``` -- arbitrary object: obj = { mat = torch.randn(10,10), name = '10', test = { entry = 1 } } -- save to disk: torch.save('test.dat', obj) ``` ### [object] torch.load(filename [, format, referenced]) ### Reads `object` from a file named `filename`. The `format` can be set to `ascii`, `binary`, `b32` or `b64` (default is binary). Binary format is platform dependent, but typically more compact and faster to read/write. Use `b32`/`b64`, instead of `binary`, for loading files saved on a 32/64 bit OS. The ASCII format is platform-independent, and may be used to share data structures across platforms. The option `referenced` specifies if [object references](file.md#torch.File.referenced) should be tracked or not (`true` by default). Note that files written with `referenced` at `true` cannot be loaded with `referenced` at `false`. ``` -- given serialized object from section above, reload: obj = torch.load('test.dat') print(obj) -- will print: -- {[mat] = DoubleTensor - size: 10x10 -- [name] = string : "10" -- [test] = table - size: 0} ``` ### [str] torch.serialize(object [, format]) ### Serializes `object` into a string. The `format` can be set to `ascii` or `binary` (default is binary). Binary format is platform dependent, but typically more compact and faster to read/write. The ASCII format is platform-independent, and should be used to share data structures across platforms. ``` -- arbitrary object: obj = { mat = torch.randn(10,10), name = '10', test = { entry = 1 } } -- serialize: str = torch.serialize(obj) ``` ### [object] torch.deserialize(str [, format]) ### Deserializes `object` from a string. The `format` can be set to `ascii` or `binary` (default is binary). Binary format is platform dependent, but typically more compact and faster to read/write. The ASCII format is platform-independent, and should be used to share data structures across platforms. ``` -- given serialized object from section above, deserialize: obj = torch.deserialize(str) print(obj) -- will print: -- {[mat] = DoubleTensor - size: 10x10 -- [name] = string : "10" -- [test] = table - size: 0} ``` doc/storage.md000066400000000000000000000215321316246254300136220ustar00rootroot00000000000000 # Storage # _Storages_ are basically a way for `Lua` to access memory of a `C` pointer or array. _Storages_ can also [map the contents of a file to memory](#__torch.StorageMap). A `Storage` is an array of _basic_ `C` types. For arrays of `Torch` objects, use the `Lua` tables. Several `Storage` classes for all the basic `C` types exist and have the following self-explanatory names: `ByteStorage`, `CharStorage`, `ShortStorage`, `IntStorage`, `LongStorage`, `FloatStorage`, `DoubleStorage`. Note that `ByteStorage` and `CharStorage` represent both arrays of bytes. `ByteStorage` represents an array of _unsigned_ chars, while `CharStorage` represents an array of _signed_ chars. Conversions between two `Storage` type might be done using `copy`: ```lua x = torch.IntStorage(10):fill(1) y = torch.DoubleStorage(10):copy(x) ``` [Classical storages](#torch.Storage) are [serializable](file.md#torch.File.serialization). [Storages mapping a file](#__torch.StorageMap) are also [serializable](file.md#torch.File.serialization), but _will be saved as a normal storage_. High-level serialization commands are described in the [serialization](serialization.md) section. An alias `torch.Storage()` is made over your preferred Storage type, controlled by the [torch.setdefaulttensortype](utility.md#torch.setdefaulttensortype) function. By default, this "points" on `torch.DoubleStorage`. ## Constructors and Access Methods ## ### torch.TYPEStorage([size [, ptr]]) ### Returns a new `Storage` of type `TYPE`. Valid `TYPE` are `Byte`, `Char`, `Short`, `Int`, `Long`, `Float`, and `Double`. If `size` is given, resize the `Storage` accordingly, else create an empty `Storage`. Example: ```lua -- Creates a Storage of 10 double: x = torch.DoubleStorage(10) ``` The data in the `Storage` is _uninitialized_. The optional second argument `ptr` is a number whose value is a pointer to a memory chunk of size `size*sizeof(TYPE)` (for example coming from the [`torch.data()`](tensor.md#result-datatensor-asnumber) method). The caller remains responsible of the memory chunk and must ensure it remains stable as the storage only keeps a pointer to it (the memory is _not_ copied and will _not_ be freed at storage deletion). ### torch.TYPEStorage(table) ### `table` is assumed to be a Lua array of numbers. The constructor returns a new storage of the specified `TYPE`, of the size of the table, containing all the table elements converted Example: ```lua > = torch.IntStorage({1,2,3,4}) 1 2 3 4 [torch.IntStorage of size 4] ``` ### torch.TYPEStorage(storage [, offset [, size]]) ### Returns a new `Storage` of type `TYPE`, which is a view on the first argument. The first argument must be of the same type `TYPE`. An optional `offset` can be provided (defaults to 1). An optional `size` can also be provided to restrict the size of the new storage (defaults to `storage:size()-(offset-1)`). Example: ```lua -- Creates a Storage of 10 double: > x = torch.DoubleStorage(10) -- Creates a view on this Storage, starting at offset 3, with a size of 5: > y = torch.DoubleStorage(x, 3, 5) -- Modifying elements of y will modify x: > x:fill(0) > y:fill(1) > print(x) 0 0 1 1 1 1 1 0 0 0 [torch.DoubleStorage of size 10] ``` ### torch.TYPEStorage(filename [, shared [, size [, sharedMem]]]) ### Returns a new kind of `Storage` which maps the contents of the given `filename` to memory. Valid `TYPE` are `Byte`, `Char`, `Short`, `Int`, `Long`, `Float`, and `Double`. If the optional boolean argument `shared` is `true`, the mapped memory is shared amongst all processes on the computer. When `shared` is `true`, the file must be accessible in read-write mode. Any changes on the storage will be written in the file. The changes might be written only after destruction of the storage. When `shared` is `false` (or not provided), the file must be at least readable. Any changes on the storage will not affect the file. Note: changes made on the file after creation of the storage have an unspecified effect on the storage contents. If `size` is specified, it is the [size](#torch.Storage.size) of the returned `Storage` (in elements). In this case, if `shared` is `false` then the file must already contain at least ```lua size*(size of TYPE) ``` bytes. If `shared` is `true` then the file will be created if necessary, and extended if necessary to that many bytes in length. If `size` is not specified then the [size](#torch.Storage.size) of the returned `Storage` will be ```lua (size of file in byte)/(size of TYPE) ``` elements provided a non empty file already exists. If `sharedMem` is true then, the file will be created (or mapped) from the shared memory area using [`shm_open()`](http://linux.die.net/man/3/shm_open). On Linux systems this is implemented at `/dev/shm` partition on RAM for interprocess communication. Example: ```lua $ echo "Hello World" > hello.txt $ lua Lua 5.1.3 Copyright (C) 1994-2008 Lua.org, PUC-Rio > require 'torch' > x = torch.CharStorage('hello.txt') > = x 72 101 108 108 111 32 87 111 114 108 100 10 [torch.CharStorage of size 12] > = x:string() Hello World > = x:fill(42):string() ************ > $ cat hello.txt Hello World $ lua Lua 5.1.3 Copyright (C) 1994-2008 Lua.org, PUC-Rio > require 'torch' > x = torch.CharStorage('hello.txt', true) > = x:string() Hello World > x:fill(42) > $ cat hello.txt ************ ``` ### [number] #self ### Returns the number of elements in the storage. Equivalent to [size()](#torch.Storage.size). ### [number] self[index] ### Returns or set the element at position `index` in the storage. Valid range of `index` is 1 to [size()](#torch.Storage.size). Example: ```lua x = torch.DoubleStorage(10) print(x[5]) ``` ### [self] copy(storage) ### Copy another `storage`. The types of the two storages might be different: in that case a conversion of types occur (which might result, of course, in loss of precision or rounding). This method returns self, allowing things like: ```lua x = torch.IntStorage(10):fill(1) y = torch.DoubleStorage(10):copy(x) -- y won't be nil! ``` ### [self] fill(value) ### Fill the `Storage` with the given value. This method returns self, allowing things like: ```lua x = torch.IntStorage(10):fill(0) -- x won't be nil! ``` ### [self] resize(size) ### Resize the storage to the provided `size`. _The new contents are undetermined_. This function returns self, allowing things like: ```lua x = torch.DoubleStorage(10):fill(1) y = torch.DoubleStorage():resize(x:size()):copy(x) -- y won't be nil! ``` ### [number] size() ### Returns the number of elements in the storage. Equivalent to [#](#__torch.StorageSharp). ### [self] string(str) ### This function is available only on `ByteStorage` and `CharStorage`. This method resizes the storage to the length of the provided string `str`, and copy the contents of `str` into the storage. The `NULL` terminating character is not copied, but `str` might contain `NULL` characters. The method returns the `Storage`. ```lua > x = torch.CharStorage():string("blah blah") > print(x) 98 108 97 104 32 98 108 97 104 [torch.CharStorage of size 9] ``` ### [string] string() ### This function is available only on `ByteStorage` and `CharStorage`. The contents of the storage viewed as a string are returned. The string might contain `NULL` characters. ```lua > x = torch.CharStorage():string("blah blah") > print(x:string()) blah blah ``` ## Reference counting methods ## Storages are reference-counted. It means that each time an object (C or the Lua state) need to keep a reference over a storage, the corresponding storage reference counter will be [increased](#torch.Storage.retain). The reference counter is [decreased]((#torch.Storage.free)) when the object does not need the storage anymore. These methods should be used with extreme care. In general, they should never be called, except if you know what you are doing, as the handling of references is done automatically. They can be useful in threaded environments. Note that these methods are atomic operations. ### retain() ### Increment the reference counter of the storage. ### free() ### Decrement the reference counter of the storage. Free the storage if the counter is at 0. doc/tensor.md000066400000000000000000001612541316246254300134760ustar00rootroot00000000000000 # Tensor # The `Tensor` class is probably the most important class in `Torch`. Almost every package depends on this class. It is *__the__* class for handling numeric data. As with pretty much anything in [Torch7](./index.md), tensors are [serializable](file.md#torch.File.serialization). __Multi-dimensional matrix__ A `Tensor` is a multi-dimensional matrix. The number of dimensions is unlimited (up to what can be created using [LongStorage](storage.md)). Example: ```lua --- creation of a 4D-tensor 4x5x6x2 z = torch.Tensor(4,5,6,2) --- for more dimensions, (here a 6D tensor) one can do: s = torch.LongStorage(6) s[1] = 4; s[2] = 5; s[3] = 6; s[4] = 2; s[5] = 7; s[6] = 3; x = torch.Tensor(s) ``` The number of dimensions of a `Tensor` can be queried by [nDimension()](#torch.nDimension) or [dim()](#torch.Tensor.dim). Size of the `i-th` dimension is returned by [size(i)](#torch.Tensor.size). A [LongStorage](storage.md) containing all the dimensions can be returned by [size()](#torch.Tensor.size). ```lua > x:nDimension() 6 > x:size() 4 5 6 2 7 3 [torch.LongStorage of size 6] ``` __Internal data representation__ The actual data of a `Tensor` is contained into a [Storage](storage.md). It can be accessed using [`storage()`](#torch.storage). While the memory of a `Tensor` has to be contained in this unique `Storage`, it might not be contiguous: the first position used in the `Storage` is given by [`storageOffset()`](#torch.storageOffset) (starting at `1`). And the _jump_ needed to go from one element to another element in the `i-th` dimension is given by [`stride(i)`](#torch.Tensor.stride). In other words, given a 3D tensor ```lua x = torch.Tensor(7,7,7) ``` accessing the element `(3,4,5)` can be done by ```lua > x[3][4][5] ``` or equivalently (but slowly!) ```lua > x:storage()[x:storageOffset() +(3-1)*x:stride(1)+(4-1)*x:stride(2)+(5-1)*x:stride(3)] ``` One could say that a `Tensor` is a particular way of _viewing_ a `Storage`: a `Storage` only represents a chunk of memory, while the `Tensor` interprets this chunk of memory as having dimensions: ```lua x = torch.Tensor(4,5) s = x:storage() for i=1,s:size() do -- fill up the Storage s[i] = i end > x -- s is interpreted by x as a 2D matrix 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 [torch.DoubleTensor of dimension 4x5] ``` Note also that in Torch7 ___elements in the same row___ [elements along the __last__ dimension] are contiguous in memory for a matrix [tensor]: ```lua x = torch.Tensor(4,5) i = 0 x:apply(function() i = i + 1 return i end) > x 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 [torch.DoubleTensor of dimension 4x5] > x:stride() 5 1 -- element in the last dimension are contiguous! [torch.LongStorage of size 2] ``` This is exactly like in C (and not `Fortran`). __Tensors of different types__ Actually, several types of `Tensor` exists: ```lua ByteTensor -- contains unsigned chars CharTensor -- contains signed chars ShortTensor -- contains shorts IntTensor -- contains ints LongTensor -- contains longs FloatTensor -- contains floats DoubleTensor -- contains doubles ``` Most numeric operations are implemented _only_ for `FloatTensor` and `DoubleTensor`. Other Tensor types are useful if you want to save memory space. __Default Tensor type__ For convenience, _an alias_ `torch.Tensor` is provided, which allows the user to write type-independent scripts, which can then ran after choosing the desired Tensor type with a call like ```lua torch.setdefaulttensortype('torch.FloatTensor') ``` See [torch.setdefaulttensortype](utility.md#torch.setdefaulttensortype) for more details. By default, the alias "points" on `torch.DoubleTensor`. __Efficient memory management__ _All_ tensor operations in this class do _not_ make any memory copy. All these methods transform the existing tensor, or return a new tensor referencing _the same storage_. This magical behavior is internally obtained by good usage of the [stride()](#torch.Tensor.stride) and [storageOffset()](#torch.storageOffset). Example: ```lua x = torch.Tensor(5):zero() > x 0 0 0 0 0 [torch.DoubleTensor of dimension 5] > x:narrow(1, 2, 3):fill(1) -- narrow() returns a Tensor -- referencing the same Storage as x > x 0 1 1 1 0 [torch.Tensor of dimension 5] ``` If you really need to copy a `Tensor`, you can use the [copy()](#torch.Tensor.copy) method: ```lua y = torch.Tensor(x:size()):copy(x) ``` Or the convenience method ```lua y = x:clone() ``` We now describe all the methods for `Tensor`. If you want to specify the Tensor type, just replace `Tensor` by the name of the Tensor variant (like `CharTensor`). ## Tensor constructors ## Tensor constructors, create new Tensor object, optionally, allocating new memory. By default the elements of a newly allocated memory are not initialized, therefore, might contain arbitrary numbers. Here are several ways to construct a new `Tensor`. ### torch.Tensor() ### Returns an empty tensor. ### torch.Tensor(tensor) ### Returns a new tensor which reference the same [Storage](#torch.storage) than the given `tensor`. The [size](#torch.Tensor.size), [stride](#torch.Tensor.stride), and [storage offset](#torch.storageOffset) are the same than the given tensor. The new `Tensor` is now going to "view" the same [storage](storage.md) as the given `tensor`. As a result, any modification in the elements of the `Tensor` will have a impact on the elements of the given `tensor`, and vice-versa. No memory copy! ```lua x = torch.Tensor(2,5):fill(3.14) > x 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 [torch.DoubleTensor of dimension 2x5] y = torch.Tensor(x) > y 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 [torch.DoubleTensor of dimension 2x5] y:zero() > x -- elements of x are the same as y! 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 2x5] ``` ### torch.Tensor(sz1 [,sz2 [,sz3 [,sz4]]]]) ### Create a tensor up to 4 dimensions. The tensor size will be `sz1 x sz2 x sx3 x sz4`. ### torch.Tensor(sizes, [strides]) ### Create a tensor of any number of dimensions. The [LongStorage](storage.md) `sizes` gives the size in each dimension of the tensor. The optional [LongStorage](storage.md) `strides` gives the jump necessary to go from one element to the next one in the each dimension. Of course, `sizes` and `strides` must have the same number of elements. If not given, or if some elements of `strides` are _negative_, the [stride()](#torch.Tensor.stride) will be computed such that the tensor is as contiguous as possible in memory. Example, create a 4D 4x4x3x2 tensor: ```lua x = torch.Tensor(torch.LongStorage({4,4,3,2})) ``` Playing with the strides can give some interesting things: ```lua x = torch.Tensor(torch.LongStorage({4}), torch.LongStorage({0})):zero() -- zeroes the tensor x[1] = 1 -- all elements point to the same address! > x 1 1 1 1 [torch.DoubleTensor of dimension 4] ``` Note that _negative strides are not allowed_, and, if given as argument when constructing the Tensor, will be interpreted as //choose the right stride such that the Tensor is contiguous in memory//. Note _this method cannot be used to create `torch.LongTensor`s_. The constructor [from a storage](tensor.md#torchtensorstorage-storageoffset-sizes-strides) will be used: ```lua a = torch.LongStorage({1,2}) -- We have a torch.LongStorage containing the values 1 and 2 -- General case for TYPE ~= Long, e.g. for TYPE = Float: b = torch.FloatTensor(a) -- Creates a new torch.FloatTensor with 2 dimensions, the first of size 1 and the second of size 2 > b:size() 1 2 [torch.LongStorage of size 2] -- Special case of torch.LongTensor c = torch.LongTensor(a) -- Creates a new torch.LongTensor that uses a as storage and thus contains the values 1 and 2 > c 1 2 [torch.LongTensor of size 2] ``` ### torch.Tensor(storage, [storageOffset, sizes, [strides]]) ### Returns a tensor which uses the existing [Storage](storage.md) `storage`, starting at position `storageOffset` (>=1). The size of each dimension of the tensor is given by the [LongStorage](storage.md) `sizes`. If only `storage` is provided, it will create a 1D Tensor viewing the all Storage. The jump necessary to go from one element to the next one in each dimension is given by the optional argument [LongStorage](storage.md) `strides`. If not given, or if some elements of `strides` are negative, the [stride()](#torch.Tensor.stride) will be computed such that the tensor is as contiguous as possible in memory. Any modification in the elements of the `Storage` will have an impact on the elements of the new `Tensor`, and vice-versa. There is no memory copy! ```lua -- creates a storage with 10 elements s = torch.Storage(10):fill(1) -- we want to see it as a 2x5 tensor x = torch.Tensor(s, 1, torch.LongStorage{2,5}) > x 1 1 1 1 1 1 1 1 1 1 [torch.DoubleTensor of dimension 2x5] x:zero() > s -- the storage contents have been modified 0 0 0 0 0 0 0 0 0 0 [torch.DoubleStorage of size 10] ``` ### torch.Tensor(storage, [storageOffset, sz1 [, st1 ... [, sz4 [, st4]]]]) ### Convenience constructor (for the previous constructor) assuming a number of dimensions inferior or equal to 4. `szi` is the size in the `i-th` dimension, and `sti` is the stride in the `i-th` dimension. ### torch.Tensor(table) ### The argument is assumed to be a Lua array of numbers. The constructor returns a new Tensor of the size of the table, containing all the table elements. The table might be multi-dimensional. Example: ```lua > torch.Tensor({{1,2,3,4}, {5,6,7,8}}) 1 2 3 4 5 6 7 8 [torch.DoubleTensor of dimension 2x4] ``` ## A note on function calls ## The rest of this guide will present many functions that can be used to manipulate tensors. Most functions have been defined so that they can be called flexibly, either in an object-oriented "method call" style i.e. `src:function(...)` or a more "functional" style `torch.function(src, ...)`, where `src` is a tensor. Note that these different invocations may differ in whether they modify the tensor in-place, or create a new tensor. Additionally, some functions can be called in the form `dst:function(src, ...)` which usually suggests that the result of the operation on the `src` tensor will be stored in the tensor `dst`. Further details are given in the individual function definitions, below, but it should be noted that the documentation is currently incomplete in this regard, and readers are encouraged to experiment in an interactive session. ## Cloning ## ### [Tensor] clone() ### Returns a clone of a tensor. The memory is copied. ```lua i = 0 x = torch.Tensor(5):apply(function(x) i = i + 1 return i end) > x 1 2 3 4 5 [torch.DoubleTensor of dimension 5] -- create a clone of x y = x:clone() > y 1 2 3 4 5 [torch.DoubleTensor of dimension 5] -- fill up y with 1 y:fill(1) > y 1 1 1 1 1 [torch.DoubleTensor of dimension 5] -- the contents of x were not changed: > x 1 2 3 4 5 [torch.DoubleTensor of dimension 5] ``` ### [Tensor] contiguous ### * If the given Tensor contents are contiguous in memory, returns the exact same Tensor (no memory copy). * Otherwise (_not contiguous in memory_), returns a [clone](#torch.Tensor.clone) (memory _copy_). ```lua x = torch.Tensor(2,3):fill(1) > x 1 1 1 1 1 1 [torch.DoubleTensor of dimension 2x3] -- x is contiguous, so y points to the same thing y = x:contiguous():fill(2) > y 2 2 2 2 2 2 [torch.DoubleTensor of dimension 2x3] -- contents of x have been changed > x 2 2 2 2 2 2 [torch.DoubleTensor of dimension 2x3] -- x:t() is not contiguous, so z is a clone z = x:t():contiguous():fill(3.14) > z 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 [torch.DoubleTensor of dimension 3x2] -- contents of x have not been changed > x 2 2 2 2 2 2 [torch.DoubleTensor of dimension 2x3] ``` ### [Tensor or string] type(type) ### __If `type` is `nil`__, returns a string containing the type name of the given tensor. ```lua = torch.Tensor():type() torch.DoubleTensor ``` __If `type` is a string__ describing a Tensor type, and is equal to the given tensor typename, returns the exact same tensor (//no memory copy//). ```lua x = torch.Tensor(3):fill(3.14) > x 3.1400 3.1400 3.1400 [torch.DoubleTensor of dimension 3] y = x:type('torch.DoubleTensor') > y 3.1400 3.1400 3.1400 [torch.DoubleTensor of dimension 3] -- zero y contents y:zero() -- contents of x have been changed > x 0 0 0 [torch.DoubleTensor of dimension 3] ``` __If `type` is a string__ describing a Tensor type, different from the type name of the given Tensor, returns a new Tensor of the specified type, whose contents corresponds to the contents of the original Tensor, casted to the given type (//memory copy occurs, with possible loss of precision//). ```lua x = torch.Tensor(3):fill(3.14) > x 3.1400 3.1400 3.1400 [torch.DoubleTensor of dimension 3] y = x:type('torch.IntTensor') > y 3 3 3 [torch.IntTensor of dimension 3] ``` ### [Tensor] typeAs(tensor) ### Convenience method for the [type](#torch.type) method. Equivalent to ```lua type(tensor:type()) ``` ### [boolean] isTensor(object) ### Returns `true` iff the provided `object` is one of the `torch.*Tensor` types. ```lua > torch.isTensor(torch.randn(3,4)) true > torch.isTensor(torch.randn(3,4)[1]) true > torch.isTensor(torch.randn(3,4)[1][2]) false ``` ### [Tensor] byte(), char(), short(), int(), long(), float(), double() ### Convenience methods for the [type](#torch.type) method. For example: ```lua x = torch.Tensor(3):fill(3.14) > x 3.1400 3.1400 3.1400 [torch.DoubleTensor of dimension 3] -- calling type('torch.IntTensor') > x:type('torch.IntTensor') 3 3 3 [torch.IntTensor of dimension 3] -- is equivalent to calling int() > x:int() 3 3 3 [torch.IntTensor of dimension 3] ``` ## Querying the size and structure ## ### [number] nDimension() ### Returns the number of dimensions in a `Tensor`. ```lua x = torch.Tensor(4,5) -- a matrix > x:nDimension() 2 ``` ### [number] dim() ### Same as [nDimension()](#torch.nDimension). ### [number] size(dim) ### Returns the size of the specified dimension `dim`. Example: ```lua x = torch.Tensor(4,5):zero() > x 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 4x5] > x:size(2) -- gets the number of columns 5 ``` ### [LongStorage] size() ### Returns a [LongStorage](storage.md) containing the size of each dimension of the tensor. ```lua x = torch.Tensor(4,5):zero() > x 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 4x5] > x:size() 4 5 [torch.LongStorage of size 2] ``` ### [LongStorage] #self ### Same as [size()](#torch.Tensor.size) method. ### [number] stride(dim) ### Returns the jump necessary to go from one element to the next one in the specified dimension `dim`. Example: ```lua x = torch.Tensor(4,5):zero() > x 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 4x5] -- elements in a row are contiguous in memory > x:stride(2) 1 -- to go from one element to the next one in a column -- we need here to jump the size of the row > x:stride(1) 5 ``` Note also that in `Torch` _elements in the same row_ [elements along the __last__ dimension] are contiguous in memory for a matrix [tensor]. ### [LongStorage] stride() ### Returns the jump necessary to go from one element to the next one in each dimension. Example: ```lua x = torch.Tensor(4,5):zero() > x 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 4x5] > x:stride() 5 1 -- elements are contiguous in a row [last dimension] [torch.LongStorage of size 2] ``` Note also that in `Torch` _elements in the same row_ [elements along the __last__ dimension] are contiguous in memory for a matrix [tensor]. ### [Storage] storage() ### Returns the [Storage](storage.md) used to store all the elements of the `Tensor`. Basically, a `Tensor` is a particular way of _viewing_ a `Storage`. ```lua x = torch.Tensor(4,5) s = x:storage() for i=1,s:size() do -- fill up the Storage s[i] = i end > x -- s is interpreted by x as a 2D matrix 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 [torch.DoubleTensor of dimension 4x5] ``` ### [boolean] isContiguous() ### Returns `true` iff the elements of the `Tensor` are contiguous in memory. ```lua -- normal tensors are contiguous in memory x = torch.randn(4,5) > x:isContiguous() true -- y now "views" the 3rd column of x -- the storage of y is the same than x -- so the memory cannot be contiguous y = x:select(2, 3) > y:isContiguous() false -- indeed, to jump to one element to -- the next one, the stride is 5 > y:stride() 5 [torch.LongStorage of size 1] ``` ### [boolean] isSize(storage) ### Returns `true` iff the dimensions of the `Tensor` match the elements of the `storage`. ```lua x = torch.Tensor(4,5) y = torch.LongStorage({4,5}) z = torch.LongStorage({5,4,1}) > x:isSize(y) true > x:isSize(z) false > x:isSize(x:size()) true ``` ### [boolean] isSameSizeAs(tensor) ### Returns `true` iff the dimensions of the `Tensor` and the argument `Tensor` are exactly the same. ```lua x = torch.Tensor(4,5) y = torch.Tensor(4,5) > x:isSameSizeAs(y) true y = torch.Tensor(4,6) > x:isSameSizeAs(y) false ``` ### [number] nElement() ### Returns the number of elements of a tensor. ```lua x = torch.Tensor(4,5) > x:nElement() -- 4x5 = 20! 20 ``` ### [number] storageOffset() ### Return the first index (starting at 1) used in the tensor's [storage](#torch.storage). ## Querying elements ## Elements of a tensor can be retrieved with the `[index]` operator. If `index` is a number, `[index]` operator is equivalent to a [`select(1, index)`](#torch.Tensor.select). If the tensor has more than one dimension, this operation returns a slice of the tensor that shares the same underlying storage. If the tensor is a 1D tensor, it returns the value at `index` in this tensor. If `index` is a table, the table must contain _n_ numbers, where _n_ is the [number of dimensions](#torch.nDimension) of the Tensor. It will return the element at the given position. In the same spirit, `index` might be a [LongStorage](storage.md), specifying the position (in the Tensor) of the element to be retrieved. If `index` is a `ByteTensor` in which each element is 0 or 1 then it acts as a selection mask used to extract a subset of the original tensor. This is particularly useful with [logical operators](maths.md#logical-operations-on-tensors) like [`torch.le`](maths.md#torchlea-b). Example: ```lua x = torch.Tensor(3,3) i = 0; x:apply(function() i = i + 1; return i end) > x 1 2 3 4 5 6 7 8 9 [torch.DoubleTensor of dimension 3x3] > x[2] -- returns row 2 4 5 6 [torch.DoubleTensor of dimension 3] > x[2][3] -- returns row 2, column 3 6 > x[{2,3}] -- another way to return row 2, column 3 6 > x[torch.LongStorage{2,3}] -- yet another way to return row 2, column 3 6 > x[torch.le(x,3)] -- torch.le returns a ByteTensor that acts as a mask 1 2 3 [torch.DoubleTensor of dimension 3] ``` ## Referencing a tensor to an existing tensor or chunk of memory ## A `Tensor` being a way of _viewing_ a [Storage](storage.md), it is possible to "set" a `Tensor` such that it views an existing [Storage](storage.md). Note that if you want to perform a set on an empty `Tensor` like ```lua y = torch.Storage(10) x = torch.Tensor() x:set(y, 1, 10) ``` you might want in that case to use one of the [equivalent constructor](#torch.Tensor). ```lua y = torch.Storage(10) x = torch.Tensor(y, 1, 10) ``` ### [self] set(tensor) ### The `Tensor` is now going to "view" the same [storage](#torch.storage) as the given `tensor`. As the result, any modification in the elements of the `Tensor` will have an impact on the elements of the given `tensor`, and vice-versa. This is an efficient method, as there is no memory copy! ```lua x = torch.Tensor(2,5):fill(3.14) > x 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 [torch.DoubleTensor of dimension 2x5] y = torch.Tensor():set(x) > y 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 3.1400 [torch.DoubleTensor of dimension 2x5] y:zero() > x -- elements of x are the same than y! 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 2x5] ``` ### [boolean] isSetTo(tensor) ### Returns true iff the `Tensor` is set to the argument `Tensor`. Note: this is only true if the tensors are the same size, have the same strides and share the same storage and offset. ```lua x = torch.Tensor(2,5) y = torch.Tensor() > y:isSetTo(x) false > y:set(x) > y:isSetTo(x) true > y:t():isSetTo(x) false -- x and y have different strides ``` ### [self] set(storage, [storageOffset, sizes, [strides]]) ### The `Tensor` is now going to "view" the given [`storage`](storage.md), starting at position `storageOffset` (>=1) with the given [dimension `sizes`](#torch.Tensor.size) and the optional given [`strides`](#torch.Tensor.stride). As the result, any modification in the elements of the `Storage` will have a impact on the elements of the `Tensor`, and vice-versa. This is an efficient method, as there is no memory copy! If only `storage` is provided, the whole storage will be viewed as a 1D Tensor. ```lua -- creates a storage with 10 elements s = torch.Storage(10):fill(1) -- we want to see it as a 2x5 tensor sz = torch.LongStorage({2,5}) x = torch.Tensor() x:set(s, 1, sz) > x 1 1 1 1 1 1 1 1 1 1 [torch.DoubleTensor of dimension 2x5] x:zero() > s -- the storage contents have been modified 0 0 0 0 0 0 0 0 0 0 [torch.DoubleStorage of size 10] ``` ### [self] set(storage, [storageOffset, sz1 [, st1 ... [, sz4 [, st4]]]]) ### This is a "shortcut" for previous method. It works up to 4 dimensions. `szi` is the size of the `i`-th dimension of the tensor. `sti` is the stride in the `i`-th dimension. ## Copying and initializing ## ### [self] copy(tensor) ### Replace the elements of the `Tensor` by copying the elements of the given `tensor`. The [number of elements](#torch.Tensor.nElement) must match, but the sizes might be different. ```lua x = torch.Tensor(4):fill(1) y = torch.Tensor(2,2):copy(x) > x 1 1 1 1 [torch.DoubleTensor of dimension 4] > y 1 1 1 1 [torch.DoubleTensor of dimension 2x2] ``` If a different type of `tensor` is given, then a type conversion occurs, which, of course, might result in loss of precision. ### [self] fill(value) ### Fill the tensor with the given `value`. ```lua > torch.DoubleTensor(4):fill(3.14) 3.1400 3.1400 3.1400 3.1400 [torch.DoubleTensor of dimension 4] ``` ### [self] zero() ### Fill the tensor with zeros. ```lua > torch.Tensor(4):zero() 0 0 0 0 [torch.DoubleTensor of dimension 4] ``` ## Resizing ## __When resizing to a larger size__, the underlying [Storage](storage.md) is resized to fit all the elements of the `Tensor`. __When resizing to a smaller size__, the underlying [Storage](#Storage) is not resized. __Important note:__ the content of a `Tensor` after resizing is _undetermined_ as [strides](#torch.Tensor.stride) might have been completely changed. In particular, _the elements of the resized tensor are contiguous in memory_. ### [self] resizeAs(tensor) ### Resize the `tensor` as the given `tensor` (of the same type). ### [self] resize(sizes) ### Resize the `tensor` according to the given [LongStorage](storage.md) `sizes`. ### [self] resize(sz1 [,sz2 [,sz3 [,sz4]]]]) ### Convenience method of the previous method, working for a number of dimensions up to 4. ## Extracting sub-tensors ## Each of these methods returns a `Tensor` which is a sub-tensor of the given tensor. For methods `narrow`, `select` and `sub` the returned tensor _shares the same `Storage`_ as the original. Hence, any modification in the memory of the sub-tensor will have an impact on the primary tensor, and vice-versa. These methods are very fast, as they do not involve any memory copy. For all other methods in this section such as `index`, `indexCopy` etc., since you cannot extract a shared subtensor (technically), a new tensor is returned. If you make changes in this new tensor, they are not reflected in the original tensor. ### [self] narrow(dim, index, size) ### Returns a new `Tensor` which is a narrowed version of the current one: the dimension `dim` is narrowed from `index` to `index+size-1`. ```lua x = torch.Tensor(5, 6):zero() > x 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 5x6] y = x:narrow(1, 2, 3) -- narrow dimension 1 from index 2 to index 2+3-1 y:fill(1) -- fill with 1 > y 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [torch.DoubleTensor of dimension 3x6] > x -- memory in x has been modified! 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 [torch.DoubleTensor of dimension 5x6] ``` ### [Tensor] sub(dim1s, dim1e ... [, dim4s [, dim4e]]) ### This method is equivalent to do a series of [narrow](#torch.Tensor.narrow) up to the first 4 dimensions. It returns a new `Tensor` which is a sub-tensor going from index `dimis` to `dimie` in the `i`-th dimension. Negative values are interpreted index starting from the end: `-1` is the last index, `-2` is the index before the last index, ... ```lua x = torch.Tensor(5, 6):zero() > x 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 5x6] y = x:sub(2,4):fill(1) -- y is sub-tensor of x: > y -- dimension 1 starts at index 2, ends at index 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [torch.DoubleTensor of dimension 3x6] > x -- x has been modified! 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 [torch.DoubleTensor of dimension 5x6] z = x:sub(2,4,3,4):fill(2) -- we now take a new sub-tensor > z -- dimension 1 starts at index 2, ends at index 4 -- dimension 2 starts at index 3, ends at index 4 2 2 2 2 2 2 [torch.DoubleTensor of dimension 3x2] > x -- x has been modified 0 0 0 0 0 0 1 1 2 2 1 1 1 1 2 2 1 1 1 1 2 2 1 1 0 0 0 0 0 0 [torch.DoubleTensor of dimension 5x6] > y -- y has been modified 1 1 2 2 1 1 1 1 2 2 1 1 1 1 2 2 1 1 [torch.DoubleTensor of dimension 3x6] > y:sub(-1, -1, 3, 4) -- negative values = bounds 2 2 [torch.DoubleTensor of dimension 1x2] ``` ### [Tensor] select(dim, index) ### Returns a new `Tensor` which is a tensor slice at the given `index` in the dimension `dim`. The returned tensor has one less dimension: the dimension `dim` is removed. As a result, it is not possible to `select()` on a 1D tensor. Note that "selecting" on the first dimension is equivalent to use the [[] operator](#torch.__index__ ) ```lua x = torch.Tensor(5,6):zero() > x 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 5x6] y = x:select(1, 2):fill(2) -- select row 2 and fill up > y 2 2 2 2 2 2 [torch.DoubleTensor of dimension 6] > x 0 0 0 0 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 5x6] z = x:select(2,5):fill(5) -- select column 5 and fill up > z 5 5 5 5 5 [torch.DoubleTensor of dimension 5] > x 0 0 0 0 5 0 2 2 2 2 5 2 0 0 0 0 5 0 0 0 0 0 5 0 0 0 0 0 5 0 [torch.DoubleTensor of dimension 5x6] ``` ### [Tensor] [{ dim1,dim2,... }] or [{ {dim1s,dim1e}, {dim2s,dim2e} }] ### The indexing operator [] can be used to combine narrow/sub and select in a concise and efficient way. It can also be used to copy, and fill (sub) tensors. This operator also works with an input mask made of a `ByteTensor` with 0 and 1 elements, e.g with a [logical operator](maths.md#logical-operations-on-tensors). ```lua x = torch.Tensor(5, 6):zero() > x 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 5x6] x[{ 1,3 }] = 1 -- sets element at (i=1,j=3) to 1 > x 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 5x6] x[{ 2,{2,4} }] = 2 -- sets a slice of 3 elements to 2 > x 0 0 1 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [torch.DoubleTensor of dimension 5x6] x[{ {},4 }] = -1 -- sets the full 4th column to -1 > x 0 0 1 -1 0 0 0 2 2 -1 0 0 0 0 0 -1 0 0 0 0 0 -1 0 0 0 0 0 -1 0 0 [torch.DoubleTensor of dimension 5x6] x[{ {},2 }] = torch.range(1,5) -- copy a 1D tensor to a slice of x > x 0 1 1 -1 0 0 0 2 2 -1 0 0 0 3 0 -1 0 0 0 4 0 -1 0 0 0 5 0 -1 0 0 [torch.DoubleTensor of dimension 5x6] x[torch.lt(x,0)] = -2 -- sets all negative elements to -2 via a mask > x 0 1 1 -2 0 0 0 2 2 -2 0 0 0 3 0 -2 0 0 0 4 0 -2 0 0 0 5 0 -2 0 0 [torch.DoubleTensor of dimension 5x6] ``` ### [Tensor] index(dim, index) ### Returns a new `Tensor` which indexes the original `Tensor` along dimension `dim` using the entries in `torch.LongTensor` `index`. The returned `Tensor` has the same number of dimensions as the original `Tensor`. The returned `Tensor` does __not__ use the same storage as the original `Tensor` -- see below for storing the result in an existing `Tensor`. ```lua x = torch.rand(5,5) > x 0.8020 0.7246 0.1204 0.3419 0.4385 0.0369 0.4158 0.0985 0.3024 0.8186 0.2746 0.9362 0.2546 0.8586 0.6674 0.7473 0.9028 0.1046 0.9085 0.6622 0.1412 0.6784 0.1624 0.8113 0.3949 [torch.DoubleTensor of dimension 5x5] y = x:index(1,torch.LongTensor{3,1}) > y 0.2746 0.9362 0.2546 0.8586 0.6674 0.8020 0.7246 0.1204 0.3419 0.4385 [torch.DoubleTensor of dimension 2x5] y:fill(1) > y 1 1 1 1 1 1 1 1 1 1 [torch.DoubleTensor of dimension 2x5] > x 0.8020 0.7246 0.1204 0.3419 0.4385 0.0369 0.4158 0.0985 0.3024 0.8186 0.2746 0.9362 0.2546 0.8586 0.6674 0.7473 0.9028 0.1046 0.9085 0.6622 0.1412 0.6784 0.1624 0.8113 0.3949 [torch.DoubleTensor of dimension 5x5] ``` Note the explicit `index` function is different than the indexing operator `[]`. The indexing operator `[]` is a syntactic shortcut for a series of select and narrow operations, therefore it always returns a new view on the original tensor that shares the same storage. However, the explicit `index` function can not use the same storage. It is possible to store the result into an existing Tensor with `result:index(source, ...)`: ```lua x = torch.rand(5,5) > x 0.8020 0.7246 0.1204 0.3419 0.4385 0.0369 0.4158 0.0985 0.3024 0.8186 0.2746 0.9362 0.2546 0.8586 0.6674 0.7473 0.9028 0.1046 0.9085 0.6622 0.1412 0.6784 0.1624 0.8113 0.3949 [torch.DoubleTensor of dimension 5x5] y = torch.Tensor() y:index(x,1,torch.LongTensor{3,1}) > y 0.2746 0.9362 0.2546 0.8586 0.6674 0.8020 0.7246 0.1204 0.3419 0.4385 [torch.DoubleTensor of dimension 2x5] ``` ### [Tensor] indexCopy(dim, index, tensor) ### Copies the elements of `tensor` into the original tensor by selecting the indices in the order given in `index`. The shape of `tensor` must exactly match the elements indexed or an error will be thrown. ```lua > x 0.8020 0.7246 0.1204 0.3419 0.4385 0.0369 0.4158 0.0985 0.3024 0.8186 0.2746 0.9362 0.2546 0.8586 0.6674 0.7473 0.9028 0.1046 0.9085 0.6622 0.1412 0.6784 0.1624 0.8113 0.3949 [torch.DoubleTensor of dimension 5x5] z=torch.Tensor(5,2) z:select(2,1):fill(-1) z:select(2,2):fill(-2) > z -1 -2 -1 -2 -1 -2 -1 -2 -1 -2 [torch.DoubleTensor of dimension 5x2] x:indexCopy(2,torch.LongTensor{5,1},z) > x -2.0000 0.7246 0.1204 0.3419 -1.0000 -2.0000 0.4158 0.0985 0.3024 -1.0000 -2.0000 0.9362 0.2546 0.8586 -1.0000 -2.0000 0.9028 0.1046 0.9085 -1.0000 -2.0000 0.6784 0.1624 0.8113 -1.0000 [torch.DoubleTensor of dimension 5x5] ``` ### [Tensor] indexAdd(dim, index, tensor) ### Accumulate the elements of `tensor` into the original tensor by adding to the indices in the order given in `index`. The shape of `tensor` must exactly match the elements indexed or an error will be thrown. ```lua Example 1 > x -2.1742 0.5688 -1.0201 0.1383 1.0504 0.0970 0.2169 0.1324 0.9553 -1.9518 -0.7607 0.8947 0.1658 -0.2181 -2.1237 -1.4099 0.2342 0.4549 0.6316 -0.2608 0.0349 0.4713 0.0050 0.1677 0.2103 [torch.DoubleTensor of size 5x5] z=torch.Tensor(5, 2) z:select(2,1):fill(-1) z:select(2,2):fill(-2) > z -1 -2 -1 -2 -1 -2 -1 -2 -1 -2 [torch.DoubleTensor of dimension 5x2] > x:indexAdd(2,torch.LongTensor{5,1},z) > x -4.1742 0.5688 -1.0201 0.1383 0.0504 -1.9030 0.2169 0.1324 0.9553 -2.9518 -2.7607 0.8947 0.1658 -0.2181 -3.1237 -3.4099 0.2342 0.4549 0.6316 -1.2608 -1.9651 0.4713 0.0050 0.1677 -0.7897 [torch.DoubleTensor of size 5x5] Example 2 > a = torch.range(1, 5) > a 1 2 3 4 5 [torch.DoubleTensor of size 5] > a:indexAdd(1, torch.LongTensor{1, 1, 3, 3}, torch.range(1, 4)) > a 4 2 10 4 5 [torch.DoubleTensor of size 5] ``` ### [Tensor] indexFill(dim, index, val) ### Fills the elements of the original `Tensor` with value `val` by selecting the indices in the order given in `index`. ```lua x=torch.rand(5,5) > x 0.8414 0.4121 0.3934 0.5600 0.5403 0.3029 0.2040 0.7893 0.6079 0.6334 0.3743 0.1389 0.1573 0.1357 0.8460 0.2838 0.9925 0.0076 0.7220 0.5185 0.8739 0.6887 0.4271 0.0385 0.9116 [torch.DoubleTensor of dimension 5x5] x:indexFill(2,torch.LongTensor{4,2},-10) > x 0.8414 -10.0000 0.3934 -10.0000 0.5403 0.3029 -10.0000 0.7893 -10.0000 0.6334 0.3743 -10.0000 0.1573 -10.0000 0.8460 0.2838 -10.0000 0.0076 -10.0000 0.5185 0.8739 -10.0000 0.4271 -10.0000 0.9116 [torch.DoubleTensor of dimension 5x5] ``` ### [Tensor] gather(dim, index) ### Creates a new `Tensor` from the original tensor by gathering a number of values from each "row", where the rows are along the dimension `dim`. The values in a `LongTensor`, passed as `index`, specify which values to take from each row. Specifically, the resulting `Tensor`, which will have the same size as the `index` tensor, is given by ```lua -- dim = 1 result[i][j][k]... = src[index[i][j][k]...][j][k]... -- dim = 2 result[i][j][k]... = src[i][index[i][j][k]...][k]... -- etc. ``` where `src` is the original `Tensor`. The same number of values are selected from each row, and the same value cannot be selected from a row more than once. The values in the `index` tensor must not be larger than the length of the row, that is they must be between 1 and `src:size(dim)` inclusive. It can be somewhat confusing to ensure that the `index` tensor has the correct shape. Viewed pictorially: ![The gather operation](gather.png) Numerically, to give an example, if `src` has size `n x m x p x q`, we are gathering along `dim = 3`, and we wish to gather `k` elements from each row (where `k <= p`) then `index` must have size `n x m x k x q`. It is possible to store the result into an existing Tensor with `result:gather(src, ...)`. ```lua x = torch.rand(5, 5) > x 0.7259 0.5291 0.4559 0.4367 0.4133 0.0513 0.4404 0.4741 0.0658 0.0653 0.3393 0.1735 0.6439 0.1011 0.7923 0.7606 0.5025 0.5706 0.7193 0.1572 0.1720 0.3546 0.8354 0.8339 0.3025 [torch.DoubleTensor of size 5x5] y = x:gather(1, torch.LongTensor{{1, 2, 3, 4, 5}, {2, 3, 4, 5, 1}}) > y 0.7259 0.4404 0.6439 0.7193 0.3025 0.0513 0.1735 0.5706 0.8339 0.4133 [torch.DoubleTensor of size 2x5] z = x:gather(2, torch.LongTensor{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 1}}) > z 0.7259 0.5291 0.4404 0.4741 0.6439 0.1011 0.7193 0.1572 0.3025 0.1720 [torch.DoubleTensor of size 5x2] ``` ### [Tensor] scatter(dim, index, src|val) ### Writes all values from tensor `src` or the scalar `val` into `self` at the specified indices. The indices are specified with respect to the given dimension, `dim`, in the manner described in [gather](#torch.Tensor.gather). Note that, as for gather, the values of index must be between 1 and `self:size(dim)` inclusive and all values in a row along the specified dimension must be unique. ```lua x = torch.rand(2, 5) > x 0.3227 0.4294 0.8476 0.9414 0.1159 0.7338 0.5185 0.2947 0.0578 0.1273 [torch.DoubleTensor of size 2x5] y = torch.zeros(3, 5):scatter(1, torch.LongTensor{{1, 2, 3, 1, 1}, {3, 1, 1, 2, 3}}, x) > y 0.3227 0.5185 0.2947 0.9414 0.1159 0.0000 0.4294 0.0000 0.0578 0.0000 0.7338 0.0000 0.8476 0.0000 0.1273 [torch.DoubleTensor of size 3x5] z = torch.zeros(2, 4):scatter(2, torch.LongTensor{{3}, {4}}, 1.23) > z 0.0000 0.0000 1.2300 0.0000 0.0000 0.0000 0.0000 1.2300 [torch.DoubleTensor of size 2x4] ``` ### [Tensor] maskedSelect(mask) ### Returns a new Tensor which contains all elements aligned to a `1` in the corresponding `mask`. This `mask` is a `torch.ByteTensor` of zeros and ones. The `mask` and `Tensor` must have the same number of elements. The resulting Tensor will be a 1D tensor of the same type as `Tensor` having size `mask:sum()`. ```lua x = torch.range(1,12):double():resize(3,4) > x 1 2 3 4 5 6 7 8 9 10 11 12 [torch.DoubleTensor of dimension 3x4] mask = torch.ByteTensor(2,6):bernoulli() > mask 1 0 1 0 0 0 1 1 0 0 0 1 [torch.ByteTensor of dimension 2x6] y = x:maskedSelect(mask) > y 1 3 7 8 12 [torch.DoubleTensor of dimension 5] z = torch.DoubleTensor() z:maskedSelect(x, mask) > z 1 3 7 8 12 ``` Note how the dimensions of the above `x`, `mask` and `y` do not match. Also note how an existing tensor `z` can be used to store the results. ### [Tensor] maskedCopy(mask, tensor) ### Copies the elements of `tensor` into `mask` locations of itself. The masked elements are those elements having a corresponding `1` in the `mask` Tensor. This `mask` is a `torch.ByteTensor` of zeros and ones. The destination `Tensor` and the `mask` Tensor should have the same number of elements. The source `tensor` should have at least as many elements as the number of 1s in the `mask`. ```lua x = torch.Tensor({0, 0, 0, 0}) mask = torch.ByteTensor({0, 1, 0, 1}) y = torch.Tensor({10, 20}) x:maskedCopy(mask,y) print(x) 0 10 0 20 [torch.DoubleTensor of size 4] ``` ```lua x = torch.range(1,4):double():resize(2,2) > x 1 2 3 4 [torch.DoubleTensor of dimension 2x4] mask = torch.ByteTensor(1,8):bernoulli() > mask 0 0 1 1 1 0 1 0 [torch.ByteTensor of dimension 1x8] y = torch.DoubleTensor(2,4):fill(-1) > y -1 -1 -1 -1 -1 -1 -1 -1 [torch.DoubleTensor of dimension 2x4] y:maskedCopy(mask, x) > y -1 -1 1 2 3 -1 4 -1 [torch.DoubleTensor of dimension 2x4] ``` Note how the dimensions of the above `x`, `mask` and `y` do not match, but the number of elements do. ### [Tensor] maskedFill(mask, val) ### Fills the masked elements of itself with value `val`. The masked elements are those elements having a corresponding `1` in the `mask` Tensor. This `mask` is a `torch.ByteTensor` of zeros and ones. The `mask` and `Tensor` must have the same number of elements. ```lua x = torch.range(1,4):double():resize(1,4) > x 1 2 3 4 [torch.DoubleTensor of dimension 1x4] mask = torch.ByteTensor(2,2):bernoulli() > mask 0 0 1 1 [torch.ByteTensor of dimension 2x2] x:maskedFill(mask, -1) > x 1 2 -1 -1 [torch.DoubleTensor of dimension 1x4] ``` Note how the dimensions of the above `x` and `mask` do not match, but the number of elements do. ## Search ## Each of these methods returns a `LongTensor` corresponding to the indices of the given search operation. ### [LongTensor] nonzero(tensor) ### Finds and returns a `LongTensor` corresponding to the *subscript* indices of all non-zero elements in `tensor`. Note that torch uses the first argument on dispatch to determine the return type. Since the first argument is any `torch.TensorType`, but the return type is always `torch.LongTensor`, the function call `torch.nonzero(torch.LongTensor(), tensor)` does not work. However, `tensor.nonzero(torch.LongTensor(), tensor)` does work. ```lua > x = torch.rand(4, 4):mul(3):floor():int() > x 2 0 2 0 0 0 1 2 0 2 2 1 2 1 2 2 [torch.IntTensor of dimension 4x4] > torch.nonzero(x) 1 1 1 3 2 3 2 4 3 2 3 3 3 4 4 1 4 2 4 3 4 4 [torch.LongTensor of dimension 11x2] > x:nonzero() 1 1 1 3 2 3 2 4 3 2 3 3 3 4 4 1 4 2 4 3 4 4 [torch.LongTensor of dimension 11x2] > indices = torch.LongTensor() > x.nonzero(indices, x) 1 1 1 3 2 3 2 4 3 2 3 3 3 4 4 1 4 2 4 3 4 4 [torch.LongTensor of dimension 11x2] > x:eq(1):nonzero() 2 3 3 4 4 2 [torch.LongTensor of dimension 3x2] ``` ## Expanding/Replicating/Squeezing Tensors ## These methods returns a Tensor which is created by replications of the original tensor. ### [result] expand([result,] sizes) ### `sizes` can either be a `torch.LongStorage` or numbers. Expanding a tensor does not allocate new memory, but only creates a new view on the existing tensor where singleton dimensions can be expanded to multiple ones by setting the `stride` to 0. Any dimension that has size 1 can be expanded to arbitrary value without any new memory allocation. Attempting to expand along a dimension that does not have size 1 will result in an error. ```lua x = torch.rand(10,1) > x 0.3837 0.5966 0.0763 0.1896 0.4958 0.6841 0.4038 0.4068 0.1502 0.2239 [torch.DoubleTensor of dimension 10x1] y = torch.expand(x,10,2) > y 0.3837 0.3837 0.5966 0.5966 0.0763 0.0763 0.1896 0.1896 0.4958 0.4958 0.6841 0.6841 0.4038 0.4038 0.4068 0.4068 0.1502 0.1502 0.2239 0.2239 [torch.DoubleTensor of dimension 10x2] y:fill(1) > y 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [torch.DoubleTensor of dimension 10x2] > x 1 1 1 1 1 1 1 1 1 1 [torch.DoubleTensor of dimension 10x1] i=0; y:apply(function() i=i+1;return i end) > y 2 2 4 4 6 6 8 8 10 10 12 12 14 14 16 16 18 18 20 20 [torch.DoubleTensor of dimension 10x2] > x 2 4 6 8 10 12 14 16 18 20 [torch.DoubleTensor of dimension 10x1] ``` ### [result] expandAs([result,] tensor) ### This is equivalent to `self:expand(tensor:size())` ### [Tensor] repeatTensor([result,] sizes) ### `sizes` can either be a `torch.LongStorage` or numbers. Repeating a tensor allocates new memory, unless `result` is provided, in which case its memory is resized. `sizes` specify the number of times the tensor is repeated in each dimension. ```lua x = torch.rand(5) > x 0.7160 0.6514 0.0704 0.7856 0.7452 [torch.DoubleTensor of dimension 5] > torch.repeatTensor(x,3,2) 0.7160 0.6514 0.0704 0.7856 0.7452 0.7160 0.6514 0.0704 0.7856 0.7452 0.7160 0.6514 0.0704 0.7856 0.7452 0.7160 0.6514 0.0704 0.7856 0.7452 0.7160 0.6514 0.0704 0.7856 0.7452 0.7160 0.6514 0.0704 0.7856 0.7452 [torch.DoubleTensor of dimension 3x10] > torch.repeatTensor(x,3,2,1) (1,.,.) = 0.7160 0.6514 0.0704 0.7856 0.7452 0.7160 0.6514 0.0704 0.7856 0.7452 (2,.,.) = 0.7160 0.6514 0.0704 0.7856 0.7452 0.7160 0.6514 0.0704 0.7856 0.7452 (3,.,.) = 0.7160 0.6514 0.0704 0.7856 0.7452 0.7160 0.6514 0.0704 0.7856 0.7452 [torch.DoubleTensor of dimension 3x2x5] ``` ### [Tensor] squeeze([dim]) ### Removes all singleton dimensions of the tensor. If `dim` is given, squeezes only that particular dimension of the tensor. ```lua x=torch.rand(2,1,2,1,2) > x (1,1,1,.,.) = 0.6020 0.8897 (2,1,1,.,.) = 0.4713 0.2645 (1,1,2,.,.) = 0.4441 0.9792 (2,1,2,.,.) = 0.5467 0.8648 [torch.DoubleTensor of dimension 2x1x2x1x2] > torch.squeeze(x) (1,.,.) = 0.6020 0.8897 0.4441 0.9792 (2,.,.) = 0.4713 0.2645 0.5467 0.8648 [torch.DoubleTensor of dimension 2x2x2] > torch.squeeze(x,2) (1,1,.,.) = 0.6020 0.8897 (2,1,.,.) = 0.4713 0.2645 (1,2,.,.) = 0.4441 0.9792 (2,2,.,.) = 0.5467 0.8648 [torch.DoubleTensor of dimension 2x2x1x2] ``` ## Manipulating the tensor view ## Each of these methods returns a `Tensor` which is another way of viewing the `Storage` of the given tensor. Hence, any modification in the memory of the sub-tensor will have an impact on the primary tensor, and vice-versa. These methods are very fast, because they do not involve any memory copy. ### [result] view([result,] tensor, sizes) ### Creates a view with different dimensions of the storage associated with `tensor`. If `result` is not passed, then a new tensor is returned, otherwise its storage is made to point to storage of `tensor`. `sizes` can either be a `torch.LongStorage` or numbers. If one of the dimensions is -1, the size of that dimension is inferred from the rest of the elements. ```lua x = torch.zeros(4) > x:view(2,2) 0 0 0 0 [torch.DoubleTensor of dimension 2x2] > x:view(2,-1) 0 0 0 0 [torch.DoubleTensor of dimension 2x2] > x:view(torch.LongStorage{2,2}) 0 0 0 0 [torch.DoubleTensor of dimension 2x2] > x 0 0 0 0 [torch.DoubleTensor of dimension 4] ``` ### [result] viewAs([result,] tensor, template) ### Creates a view with the same dimensions as `template` of the storage associated with `tensor`. If `result` is not passed, then a new tensor is returned, otherwise its storage is made to point to storage of `tensor`. ```lua x = torch.zeros(4) y = torch.Tensor(2,2) > x:viewAs(y) 0 0 0 0 [torch.DoubleTensor of dimension 2x2] ``` ### [Tensor] transpose(dim1, dim2) ### Returns a tensor where dimensions `dim1` and `dim2` have been swapped. For 2D tensors, the convenience method of [t()](#torch.Tensor.t) is available. ```lua x = torch.Tensor(3,4):zero() x:select(2,3):fill(7) -- fill column 3 with 7 > x 0 0 7 0 0 0 7 0 0 0 7 0 [torch.DoubleTensor of dimension 3x4] y = x:transpose(1,2) -- swap dimension 1 and 2 > y 0 0 0 0 0 0 7 7 7 0 0 0 [torch.DoubleTensor of dimension 4x3] y:select(2, 3):fill(8) -- fill column 3 with 8 > y 0 0 8 0 0 8 7 7 8 0 0 8 [torch.DoubleTensor of dimension 4x3] > x -- contents of x have changed as well 0 0 7 0 0 0 7 0 8 8 8 8 [torch.DoubleTensor of dimension 3x4] ``` ### [Tensor] t() ### Convenience method of [transpose()](#torch.Tensor.transpose) for 2D tensors. The given tensor must be 2 dimensional. Swap dimensions 1 and 2. ```lua x = torch.Tensor(3,4):zero() x:select(2,3):fill(7) y = x:t() > y 0 0 0 0 0 0 7 7 7 0 0 0 [torch.DoubleTensor of dimension 4x3] > x 0 0 7 0 0 0 7 0 0 0 7 0 [torch.DoubleTensor of dimension 3x4] ``` ### [Tensor] permute(dim1, dim2, ..., dimn) ### Generalizes the function [transpose()](#torch.Tensor.transpose) and can be used as a convenience method replacing a sequence of transpose() calls. Returns a tensor where the dimensions were permuted according to the permutation given by (dim1, dim2, ... , dimn). The permutation must be specified fully, i.e. there must be as many parameters as the tensor has dimensions. ```lua x = torch.Tensor(3,4,2,5) > x:size() 3 4 2 5 [torch.LongStorage of size 4] y = x:permute(2,3,1,4) -- equivalent to y = x:transpose(1,3):transpose(1,2) > y:size() 4 2 3 5 [torch.LongStorage of size 4] ``` ### [Tensor] unfold(dim, size, step) ### Returns a tensor which contains all slices of size `size` in the dimension `dim`. Step between two slices is given by `step`. If `sizedim` is the original size of dimension `dim`, the size of dimension `dim` in the returned tensor will be `(sizedim - size) / step + 1` An additional dimension of size `size` is appended in the returned tensor. ```lua x = torch.Tensor(7) for i=1,7 do x[i] = i end > x 1 2 3 4 5 6 7 [torch.DoubleTensor of dimension 7] > x:unfold(1, 2, 1) 1 2 2 3 3 4 4 5 5 6 6 7 [torch.DoubleTensor of dimension 6x2] > x:unfold(1, 2, 2) 1 2 3 4 5 6 [torch.DoubleTensor of dimension 3x2] ``` ## Applying a function to a tensor ## These functions apply a function to each element of the tensor on which called the method (self). These methods are much faster than using a `for` loop in `Lua`. The results are stored in `self` (if the function returns something). ### [self] apply(function) ### Apply the given function to all elements of self. The function takes a number (the current element of the tensor) and might return a number, in which case it will be stored in self. Examples: ```lua i = 0 z = torch.Tensor(3,3) z:apply(function(x) i = i + 1 return i end) -- fill up the tensor > z 1 2 3 4 5 6 7 8 9 [torch.DoubleTensor of dimension 3x3] z:apply(math.sin) -- apply the sin function > z 0.8415 0.9093 0.1411 -0.7568 -0.9589 -0.2794 0.6570 0.9894 0.4121 [torch.DoubleTensor of dimension 3x3] sum = 0 z:apply(function(x) sum = sum + x end) -- compute the sum of the elements > sum 1.9552094821074 > z:sum() -- it is indeed correct! 1.9552094821074 ``` ### [self] map(tensor, function(xs, xt)) ### Apply the given function to all elements of self and `tensor`. The number of elements of both tensors must match, but sizes do not matter. The function takes two numbers (the current element of self and `tensor`) and might return a number, in which case it will be stored in self. Example: ```lua x = torch.Tensor(3,3) y = torch.Tensor(9) i = 0 x:apply(function() i = i + 1; return i end) -- fill-up x i = 0 y:apply(function() i = i + 1; return i end) -- fill-up y > x 1 2 3 4 5 6 7 8 9 [torch.DoubleTensor of dimension 3x3] > y 1 2 3 4 5 6 7 8 9 [torch.DoubleTensor of dimension 9] x:map(y, function(xx, yy) return xx*yy end) -- element-wise multiplication > x 1 4 9 16 25 36 49 64 81 [torch.DoubleTensor of dimension 3x3] ``` ### [self] map2(tensor1, tensor2, function(x, xt1, xt2)) ### Apply the given function to all elements of self, `tensor1` and `tensor2`. The number of elements of all tensors must match, but sizes do not matter. The function takes three numbers (the current element of self, `tensor1` and `tensor2`) and might return a number, in which case it will be stored in self. Example: ```lua x = torch.Tensor(3,3) y = torch.Tensor(9) z = torch.Tensor(3,3) i = 0; x:apply(function() i = i + 1; return math.cos(i)*math.cos(i) end) i = 0; y:apply(function() i = i + 1; return i end) i = 0; z:apply(function() i = i + 1; return i end) > x 0.2919 0.1732 0.9801 0.4272 0.0805 0.9219 0.5684 0.0212 0.8302 [torch.DoubleTensor of dimension 3x3] > y 1 2 3 4 5 6 7 8 9 [torch.DoubleTensor of dimension 9] > z 1 2 3 4 5 6 7 8 9 [torch.DoubleTensor of dimension 3x3] x:map2(y, z, function(xx, yy, zz) return xx+yy*zz end) > x 1.2919 4.1732 9.9801 16.4272 25.0805 36.9219 49.5684 64.0212 81.8302 [torch.DoubleTensor of dimension 3x3] ``` ## Dividing a tensor into a table of tensors ## These functions divide a Tensor into a table of Tensors. ### [result] split([result,] tensor, size, [dim]) ### Splits Tensor `tensor` along dimension `dim` into a `result` table of Tensors of size `size` (a number) or less (in the case of the last Tensor). The sizes of the non-`dim` dimensions remain unchanged. Internally, a series of [narrows](#torch.Tensor.narrow) are performed along dimensions `dim`. Argument `dim` defaults to 1. If `result` is not passed, then a new table is returned, otherwise it is emptied and reused. Example: ```lua x = torch.randn(3,4,5) > x:split(2,1) { 1 : DoubleTensor - size: 2x4x5 2 : DoubleTensor - size: 1x4x5 } > x:split(3,2) { 1 : DoubleTensor - size: 3x3x5 2 : DoubleTensor - size: 3x1x5 } > x:split(2,3) { 1 : DoubleTensor - size: 3x4x2 2 : DoubleTensor - size: 3x4x2 3 : DoubleTensor - size: 3x4x1 } ``` ### [result] chunk([result,] tensor, n, [dim]) ### Splits Tensor `tensor` into `n` chunks of approximately equal size along dimensions `dim` and returns these as a `result` table of Tensors. Argument `dim` defaults to 1. This function uses [split](#torch.split) internally: ```lua torch.split(result, tensor, math.ceil(tensor:size(dim)/n), dim) ``` Example: ```lua x = torch.randn(3,4,5) > x:chunk(2,1) { 1 : DoubleTensor - size: 2x4x5 2 : DoubleTensor - size: 1x4x5 } > x:chunk(2,2) { 1 : DoubleTensor - size: 3x2x5 2 : DoubleTensor - size: 3x2x5 } > x:chunk(2,3) { 1 : DoubleTensor - size: 3x4x3 2 : DoubleTensor - size: 3x4x2 } ``` ## LuaJIT FFI access ## These functions expose Torch's Tensor and Storage data structures, through [LuaJIT FFI](http://luajit.org/ext_ffi_api.html). This allows extremely fast access to Tensors and Storages, all from Lua. ### [result] data(tensor, [asnumber]) ### Returns a LuaJIT FFI pointer to the raw data of the tensor. If `asnumber` is true, then returns the pointer as a `intptr_t` cdata that you can transform to a plain lua number with `tonumber()`. Accessing the raw data of a Tensor like this is extremely efficient, in fact, it's almost as fast as C in lots of cases. Example: ```lua t = torch.randn(3,2) > t 0.8008 -0.6103 0.6473 -0.1870 -0.0023 -0.4902 [torch.DoubleTensor of dimension 3x2] t_data = torch.data(t) for i = 0,t:nElement()-1 do t_data[i] = 0 end > t 0 0 0 0 0 0 [torch.DoubleTensor of dimension 3x2] ``` WARNING: bear in mind that accessing the raw data like this is dangerous, and should only be done on contiguous tensors (if a tensor is not contiguous, then you have to use its size and stride information). Making sure a tensor is contiguous is easy: ```lua t = torch.randn(3,2) t_noncontiguous = t:transpose(1,2) -- it would be unsafe to work with torch.data(t_noncontiguous) t_transposed_and_contiguous = t_noncontiguous:contiguous() -- it is now safe to work with the raw pointer data = torch.data(t_transposed_and_contiguous) ``` Last, the pointer can be returned as a plain `intptr_t` cdata. This can be useful to share pointers between threads (warning: this is dangerous, as the second tensor doesn't increment the reference counter on the storage. If the first tensor gets freed, then the data of the second tensor becomes a dangling pointer): ```lua t = torch.randn(10) p = tonumber(torch.data(t,true)) s = torch.Storage(10, p) tt = torch.Tensor(s) -- tt and t are a view on the same data. ``` ### [result] cdata(tensor, [asnumber]) ### Returns a LuaJIT FFI pointer to the C structure of the tensor. Use this with caution, and look at [FFI.lua](https://github.com/torch/torch7/blob/master/FFI.lua) for the members of the tensor ## Reference counting ## Tensors are reference-counted. It means that each time an object (C or the Lua state) need to keep a reference over a tensor, the corresponding tensor reference counter will be [increased](#torch.Tensor.retain). The reference counter is [decreased]((#torch.Tensor.free)) when the object does not need the tensor anymore. These methods should be used with extreme care. In general, they should never be called, except if you know what you are doing, as the handling of references is done automatically. They can be useful in threaded environments. Note that these methods are atomic operations. ### retain() ### Increment the reference counter of the tensor. ### free() ### Decrement the reference counter of the tensor. Free the tensor if the counter is at 0. doc/tester.md000066400000000000000000000313061316246254300134640ustar00rootroot00000000000000 # Tester # This class provides a generic unit testing framework. It is already being used in [nn](../index.md) package to verify the correctness of classes. The framework is generally used as follows. ```lua local mytest = torch.TestSuite() local tester = torch.Tester() function mytest.testA() local a = torch.Tensor{1, 2, 3} local b = torch.Tensor{1, 2, 4} tester:eq(a, b, "a and b should be equal") end function mytest.testB() local a = {2, torch.Tensor{1, 2, 2}} local b = {2, torch.Tensor{1, 2, 2.001}} tester:eq(a, b, 0.01, "a and b should be approximately equal") end function mytest.testC() local function myfunc() return "hello " .. world end tester:assertNoError(myfunc, "myfunc shouldn't give an error") end tester:add(mytest) tester:run() ``` Running this code will report two test failures (and one test success). Generally it is better to put a single test case in each test function unless several very related test cases exist. The error report includes the message and line number of the error. ``` Running 3 tests 1/3 testB ............................................................... [PASS] 2/3 testA ............................................................... [FAIL] 3/3 testC ............................................................... [FAIL] Completed 3 asserts in 3 tests with 2 failures and 0 errors -------------------------------------------------------------------------------- testA a and b should be equal TensorEQ(==) violation: max diff=1, tolerance=0 stack traceback: ./test.lua:8: in function <./test.lua:5> -------------------------------------------------------------------------------- testC myfunc shouldn't give an error ERROR violation: err=./test.lua:19: attempt to concatenate global 'world' (a nil value) stack traceback: ./test.lua:21: in function <./test.lua:17> -------------------------------------------------------------------------------- torch/torch/Tester.lua:383: An error was found while running tests! stack traceback: [C]: in function 'assert' torch/torch/Tester.lua:383: in function 'run' ./test.lua:25: in main chunk ``` Historically, Tester has supported a variety of equality checks ([asserteq](#torch.Tester.asserteq), [assertalmosteq](#torch.Tester.assertalmosteq), [assertTensorEq](#torch.Tester.assertTensorEq), [assertTableEq](#torch.Tester.assertTableEq), and their negations). In general however, you should just use [eq](#torch.Tester.eq) (or its negation [ne](#torch.Tester.ne)). These functions do deep checking of many object types including recursive tables and tensors, and support a tolerance parameter for comparing numerical values (including tensors). Many of the tester functions accept both an optional `tolerance` parameter and a `message` to display if the test case fails. For both convenience and backwards compatibility, these arguments can be supplied in either order. ### torch.Tester() ### Returns a new instance of `torch.Tester` class. ### add(f, 'name') ### Adds `f`, either a test function or a table of test functions, to the tester. If `f` is a function then names should be unique. There are a couple of special values for `name`: if it is `_setUp` or `_tearDown`, then the function will be called either *before* or *after* every test respectively, with the name of the test passed as a parameter. If `f` is a table then `name` should be nil, and the names of the individual tests in the table will be taken from the corresponding table key. It's recommended you use [TestSuite](#torch.TestSuite.dok) for tables of tests. Returns the torch.Tester instance. ### run(testNames) ### Runs tests that have been added by [add(f, 'name')](#torch.Tester.add). While running it reports progress, and at the end gives a summary of all errors. If a list of names `testNames` is passed, then all tests matching these names (using `string.match`) will be run; otherwise all tests will be run. ```lua tester:run() -- runs all tests tester:run("test1") -- runs the test named "test1" tester:run({"test2", "test3"}) -- runs the tests named "test2" and "test3" ``` ### disable(testNames) ### Prevents the given tests from running, where `testNames` can be a single string or list of strings. More precisely, when [run](#torch.Tester.run) is invoked, it will skip these tests, while still printing out an indication of skipped tests. This is useful for temporarily disabling tests without commenting out the code (for example, if they depend on upstream code that is currently broken), and explicitly flagging them as skipped. Returns the torch.Tester instance. ```lua local tester = torch.Tester() local tests = torch.TestSuite() function tests.brokenTest() -- ... end tester:add(tests):disable('brokenTest'):run() ``` ``` Running 1 test 1/1 brokenTest .......................................................... [SKIP] Completed 0 asserts in 1 test with 0 failures and 0 errors and 1 disabled ``` ### assert(condition [, message]) ### Checks that `condition` is true (using the optional `message` if the test fails). Returns whether the test passed. ### assertGeneralEq(got, expected [, tolerance] [, message]) ### General equality check between numbers, tables, strings, `torch.Tensor` objects, `torch.Storage` objects, etc. Checks that `got` and `expected` have the same contents, where tables are compared recursively, tensors and storages are compared elementwise, and numbers are compared within `tolerance` (default value `0`). Other types are compared by strict equality. The optional `message` is used if the test fails. Returns whether the test passed. ### eq(got, expected [, tolerance] [, message]) ### Convenience function; does the same as [assertGeneralEq](#torch.Tester.assertGeneralEq). ### assertGeneralNe(got, unexpected [, tolerance] [, message]) ### General inequality check between numbers, tables, strings, `torch.Tensor` objects, `torch.Storage` objects, etc. Checks that `got` and `unexpected` have different contents, where tables are compared recursively, tensors and storages are compared elementwise, and numbers are compared within `tolerance` (default value `0`). Other types are compared by strict equality. The optional `message` is used if the test fails. Returns whether the test passed. ### ne(got, unexpected [, tolerance] [, message]) ### Convenience function; does the same as [assertGeneralNe](#torch.Tester.assertGeneralNe). ### assertlt(a, b [, message]) ### Checks that `a < b` (using the optional `message` if the test fails), where `a` and `b` are numbers. Returns whether the test passed. ### assertgt(a, b [, message]) ### Checks that `a > b` (using the optional `message` if the test fails), where `a` and `b` are numbers. Returns whether the test passed. ### assertle(a, b [, message]) ### Checks that `a <= b` (using the optional `message` if the test fails), where `a` and `b` are numbers. Returns whether the test passed. ### assertge(a, b [, message]) ### Checks that `a >= b` (using the optional `message` if the test fails), where `a` and `b` are numbers. Returns whether the test passed. ### asserteq(a, b [, message]) ### Checks that `a == b` (using the optional `message` if the test fails). Note that this uses the generic lua equality check, so objects such as tensors that have the same content but are distinct objects will fail this test; consider using [assertGeneralEq()](#torch.Tester.assertGeneralEq) instead. Returns whether the test passed. ### assertne(a, b [, message]) ### Checks that `a ~= b` (using the optional `message` if the test fails). Note that this uses the generic lua inequality check, so objects such as tensors that have the same content but are distinct objects will pass this test; consider using [assertGeneralNe()](#torch.Tester.assertGeneralNe) instead. Returns whether the test passed. ### assertalmosteq(a, b [, tolerance] [, message]) ### Checks that `|a - b| <= tolerance` (using the optional `message` if the test fails), where `a` and `b` are numbers, and `tolerance` is an optional number (default `1e-16`). Returns whether the test passed. ### assertTensorEq(ta, tb [, tolerance] [, message]) ### Checks that `max(abs(ta - tb)) <= tolerance` (using the optional `message` if the test fails), where `ta` and `tb` are tensors, and `tolerance` is an optional number (default `1e-16`). Tensors that are different types or sizes will cause this check to fail. Returns whether the test passed. ### assertTensorNe(ta, tb [, tolerance] [, message]) ### Checks that `max(abs(ta - tb)) > tolerance` (using the optional `message` if the test fails), where `ta` and `tb` are tensors, and `tolerance` is an optional number (default `1e-16`). Tensors that are different types or sizes will cause this check to pass. Returns whether the test passed. ### assertTableEq(ta, tb [, tolerance] [, message]) ### Checks that the two tables have the same contents, comparing them recursively, where objects such as tensors are compared using their contents. Numbers (such as those appearing in tensors) are considered equal if their difference is at most the given tolerance. ### assertTableNe(ta, tb [, tolerance] [, message]) ### Checks that the two tables have distinct contents, comparing them recursively, where objects such as tensors are compared using their contents. Numbers (such as those appearing in tensors) are considered equal if their difference is at most the given tolerance. ### assertError(f [, message]) ### Checks that calling `f()` (via `pcall`) raises an error (using the optional `message` if the test fails). Returns whether the test passed. ### assertNoError(f [, message]) ### Check that calling `f()` (via `pcall`) does not raise an error (using the optional `message` if the test fails). Returns whether the test passed. ### assertErrorMsg(f, errmsg [, message]) ### Checks that calling `f()` (via `pcall`) raises an error with the specific error message `errmsg` (using the optional `message` if the test fails). Returns whether the test passed. ### assertErrorPattern(f, errPattern [, message]) ### Checks that calling `f()` (via `pcall`) raises an error matching `errPattern` (using the optional `message` if the test fails). The matching is done using `string.find`; in particular substrings will match. Returns whether the test passed. ### assertErrorObj(f, errcomp [, message]) ### Checks that calling `f()` (via `pcall`) raises an error object `err` such that calling `errcomp(err)` returns true (using the optional `message` if the test fails). Returns whether the test passed. ### setEarlyAbort(earlyAbort) ### If `earlyAbort == true` then the testing will stop on the first test failure. By default this is off. ### setRethrowErrors(rethrowErrors) ### If `rethrowErrors == true` then lua errors encountered during the execution of the tests will be rethrown, instead of being caught by the tester. By default this is off. ### setSummaryOnly(summaryOnly) ### If `summaryOnly == true`, then only the pass / fail status of the tests will be printed out, rather than full error messages. By default, this is off. # TestSuite # A TestSuite is used in conjunction with [Tester](#torch.Tester.dok). It is created via `torch.TestSuite()`, and behaves like a plain lua table, except that it also checks that duplicate tests are not created. It is recommended that you always use a TestSuite instead of a plain table for your tests. The following example code attempts to add a function with the same name twice to a TestSuite (a surprisingly common mistake), which gives an error. ```lua > test = torch.TestSuite() > > function test.myTest() > -- ... > end > > -- ... > > function test.myTest() > -- ... > end torch/TestSuite.lua:16: Test myTest is already defined. ``` doc/timer.md000066400000000000000000000026661316246254300133050ustar00rootroot00000000000000 # Timer # This class is able to measure time (in seconds) elapsed in a particular period. Example: ```lua timer = torch.Timer() -- the Timer starts to count now x = 0 for i=1,1000000 do x = x + math.sin(x) end print('Time elapsed for 1,000,000 sin: ' .. timer:time().real .. ' seconds') ``` ## Timer Class Constructor and Methods ## ### torch.Timer() ### Returns a new `Timer`. The timer starts to count the time now. ### [self] reset() ### Resets the timer accumulated time to `0`. If the timer was running, the timer restarts to count the time now. If the timer was stopped, it stays stopped. ### [self] resume() ### Resumes a stopped timer. The timer restarts to count the time, and addition the accumulated time with the time already counted before being stopped. ### [self] stop() ### Stops the timer. The accumulated time counted until now is stored. ### [table] time() ### Returns a table reporting the accumulated time elapsed until now. Following the UNIX shell `time` command, there are three fields in the table: * `real`: the wall-clock elapsed time. * `user`: the elapsed CPU time. Note that the CPU time of a threaded program sums time spent in all threads. * `sys`: the time spent in system usage. doc/utility.md000066400000000000000000000240731316246254300136640ustar00rootroot00000000000000 # Torch utility functions # These functions are used in all Torch package for creating and handling classes. The most interesting function is probably [`torch.class()`](#torch.class) which allows the user to create easily new classes. [`torch.typename()`](#torch.typename) might also be interesting to check what is the class of a given *Torch7* object. The other functions are for more advanced users. ### [metatable] torch.class(name, [parentName], [module]) ### Creates a new `Torch` class called `name`. If `parentName` is provided, the class will inherit `parentName` methods. A class is a table which has a particular metatable. If `module` is not provided and if `name` is of the form `package.className` then the class `className` will be added to the specified `package`. In that case, `package` has to be a valid (and already loaded) package. If `name` does not contain any `.`, then the class will be defined in the global environment. If `module` is provided table, the class will be defined in this table at key `className`. One \[or two\] (meta)tables are returned. These tables contain all the method provided by the class [and its parent class if it has been provided]. After a call to `torch.class()` you have to fill-up properly the metatable. After the class definition is complete, constructing a new class `name` will be achieved by a call to `name()`. This call will first call the method ```lua__init()``` if it exists, passing all arguments of `name()`. ```lua -- for naming convenience do --- creates a class "Foo" local Foo = torch.class('Foo') --- the initializer function Foo:__init() self.contents = 'this is some text' end --- a method function Foo:print() print(self.contents) end --- another one function Foo:bip() print('bip') end end --- now create an instance of Foo foo = Foo() --- try it out foo:print() --- create a class torch.Bar which --- inherits from Foo do local Bar, parent = torch.class('torch.Bar', 'Foo') --- the initializer function Bar:__init(stuff) --- call the parent initializer on ourself parent.__init(self) --- do some stuff self.stuff = stuff end --- a new method function Bar:boing() print('boing!') end --- override parent's method function Bar:print() print(self.contents) print(self.stuff) end end --- create a new instance and use it bar = torch.Bar('ha ha!') bar:print() -- overrided method bar:boing() -- child method bar:bip() -- parent's method ``` For advanced users, it is worth mentionning that `torch.class()` actually calls [`torch.newmetatable()`](#torch.newmetatable) with a particular constructor. The constructor creates a Lua table and set the right metatable on it, and then calls ```lua__init()``` if it exists in the metatable. It also sets a [factory](#torch.factory) field ```lua__factory``` such that it is possible to create an empty object of this class. ### [string] torch.type(object) ### Checks if `object` has a metatable. If it does, and if it corresponds to a `Torch` class, then returns a string containing the name of the class. Otherwise, it returns the Lua `type(object)` of the object. Unlike [`torch.typename()`](#torch.typename), all outputs are strings: ```lua > torch.type(torch.Tensor()) torch.DoubleTensor > torch.type({}) table > torch.type(7) number ``` ### [string] torch.typename(object) ### Checks if `object` has a metatable. If it does, and if it corresponds to a `Torch` class, then returns a string containing the name of the class. Returns `nil` in any other cases. ```lua > torch.typename(torch.Tensor()) torch.DoubleTensor > torch.typename({}) > torch.typename(7) ``` A Torch class is a class created with [`torch.class()`](#torch.class) or [`torch.newmetatable()`](#torch.newmetatable). ### [userdata] torch.typename2id(string) ### Given a Torch class name specified by `string`, returns a unique corresponding id (defined by a `lightuserdata` pointing on the internal structure of the class). This might be useful to do a *fast* check of the class of an object (if used with [`torch.id()`](#torch.id)), avoiding string comparisons. Returns `nil` if `string` does not specify a Torch object. ### [userdata] torch.id(object) ### Returns a unique id corresponding to the `class` of the given *Torch7* object. The id is defined by a `lightuserdata` pointing on the internal structure of the class. Returns `nil` if `object` is not a Torch object. This is different from the `object` id returned by [`torch.pointer()`](#torch.pointer). ### [boolean] isTypeOf(object, typeSpec) ### Checks if a given `object` is an instance of the type specified by `typeSpec`. `typeSpec` can be a string (including a `string.find` pattern) or the constructor object for a Torch class. This function traverses up the class hierarchy, so if b is an instance of B which is a subclass of A, then `torch.isTypeOf(b, B)` and `torch.isTypeOf(b, A)` will both return `true`. ### [table] torch.newmetatable(name, parentName, constructor) ### Register a new metatable as a Torch type with the given string `name`. The new metatable is returned. If the string `parentName` is not `nil` and is a valid Torch type (previously created by `torch.newmetatable()`) then set the corresponding metatable as a metatable to the returned new metatable. If the given `constructor` function is not `nil`, then assign to the variable `name` the given constructor. The given `name` might be of the form `package.className`, in which case the `className` will be local to the specified `package`. In that case, `package` must be a valid and already loaded package. ### [function] torch.factory(name) ### Returns the factory function of the Torch class `name`. If the class name is invalid or if the class has no factory, then returns `nil`. A Torch class is a class created with [`torch.class()`](#torch.class) or [`torch.newmetatable()`](#torch.newmetatable). A factory function is able to return a new (empty) object of its corresponding class. This is helpful for [object serialization](file.md#torch.File.serialization). ### [table] torch.getmetatable(string) ### Given a `string`, returns a metatable corresponding to the Torch class described by `string`. Returns `nil` if the class does not exist. A Torch class is a class created with [`torch.class()`](#torch.class) or [`torch.newmetatable()`](#torch.newmetatable). Example: ```lua > for k, v in pairs(torch.getmetatable('torch.CharStorage')) do print(k, v) end __index__ function: 0x1a4ba80 __typename torch.CharStorage write function: 0x1a49cc0 __tostring__ function: 0x1a586e0 __newindex__ function: 0x1a4ba40 string function: 0x1a4d860 __version 1 read function: 0x1a4d840 copy function: 0x1a49c80 __len__ function: 0x1a37440 fill function: 0x1a375c0 resize function: 0x1a37580 __index table: 0x1a4a080 size function: 0x1a4ba20 ``` ### [boolean] torch.isequal(object1, object2) ### If the two objects given as arguments are *Lua* tables (or *Torch7* objects), then returns `true` if and only if the tables (or Torch objects) have the same address in memory. Returns `false` in any other cases. A Torch class is a class created with [`torch.class()`](#TorchClass) or [`torch.newmetatable()`](#torch.newmetatable). ### [string] torch.getdefaulttensortype() ### Returns a string representing the default tensor type currently in use by *Torch7*. ### [table] torch.getenv(function or userdata) ### Returns the Lua `table` environment of the given `function` or the given `userdata`. To know more about environments, please read the documentation of [`lua_setfenv()`](http://www.lua.org/manual/5.1/manual.html#lua_setfenv) and [`lua_getfenv()`](http://www.lua.org/manual/5.1/manual.html#lua_getfenv). ### [number] torch.version(object) ### Returns the field ```lua__version``` of a given object. This might be helpful to handle variations in a class over time. ### [number] torch.pointer(object) ### Returns a unique id (pointer) of the given `object`, which can be a *Torch7* object, a table, a thread or a function. This is different from the `class` id returned by [`torch.id()`](#torch.id). ### torch.setdefaulttensortype([typename]) ### Sets the default tensor type for all the tensors allocated from this point on. Valid types are: * `torch.ByteTensor` * `torch.CharTensor` * `torch.ShortTensor` * `torch.IntTensor` * `torch.FloatTensor` * `torch.DoubleTensor` ### torch.setenv(function or userdata, table) ### Assign `table` as the Lua environment of the given `function` or the given `userdata`. To know more about environments, please read the documentation of [`lua_setfenv()`](http://www.lua.org/manual/5.1/manual.html#lua_setfenv) and [`lua_getfenv()`](http://www.lua.org/manual/5.1/manual.html#lua_getfenv). ### [object] torch.setmetatable(table, classname) ### Set the metatable of the given `table` to the metatable of the Torch object named `classname`. This function has to be used with a lot of care. ### [table] torch.getconstructortable(string) ### BUGGY Return the constructor table of the Torch class specified by `string`. ### [table] torch.totable(object) ### Converts a Tensor or a Storage to a lua table. Also available as methods: `tensor:totable()` and `storage:totable()`. Multidimensional Tensors are converted to a set of nested tables, matching the shape of the source Tensor. ```lua > print(torch.totable(torch.Tensor({1, 2, 3}))) { 1 : 1 2 : 2 3 : 3 } ``` general.h000066400000000000000000000012601316246254300126510ustar00rootroot00000000000000#ifndef TORCH_GENERAL_INC #define TORCH_GENERAL_INC #include #include #include #include "luaT.h" #include "TH.h" #if (defined(_MSC_VER) || defined(__MINGW32__)) #define snprintf _snprintf #define popen _popen #define pclose _pclose #endif #if LUA_VERSION_NUM >= 503 /* one can simply enable LUA_COMPAT_5_2 to be backward compatible. However, this does not work when we are trying to use system-installed lua, hence these redefines */ #define luaL_optlong(L,n,d) ((long)luaL_optinteger(L, (n), (d))) #define luaL_checklong(L,n) ((long)luaL_checkinteger(L, (n))) #define luaL_checkint(L,n) ((int)luaL_checkinteger(L, (n))) #endif #endif generic/000077500000000000000000000000001316246254300125005ustar00rootroot00000000000000generic/Storage.c000066400000000000000000000200551316246254300142520ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/Storage.c" #else #include "luaG.h" static int torch_Storage_(new)(lua_State *L) { int index = 1; THStorage *storage; THAllocator *allocator = luaT_toudata(L, index, "torch.Allocator"); if (allocator) index++; if(lua_type(L, index) == LUA_TSTRING) { if (allocator) THError("Passing allocator not supported when using file mapping"); const char *fileName = luaL_checkstring(L, index); int isShared = 0; if(luaT_optboolean(L, index + 1, 0)) isShared = TH_ALLOCATOR_MAPPED_SHARED; ptrdiff_t size = luaL_optinteger(L, index + 2, 0); if (isShared && luaT_optboolean(L, index + 3, 0)) isShared = TH_ALLOCATOR_MAPPED_SHAREDMEM; storage = THStorage_(newWithMapping)(fileName, size, isShared); } else if(lua_type(L, index) == LUA_TTABLE) { ptrdiff_t size = lua_objlen(L, index); ptrdiff_t i; if (allocator) storage = THStorage_(newWithAllocator)(size, allocator, NULL); else storage = THStorage_(newWithSize)(size); for(i = 1; i <= size; i++) { lua_rawgeti(L, index, i); if(!lua_isnumber(L, -1)) { THStorage_(free)(storage); luaL_error(L, "element at index %d is not a number", i); } THStorage_(set)(storage, i-1, LUA_NUMBER_TO_REAL(lua_tonumber(L, -1))); lua_pop(L, 1); } } else if(lua_type(L, index) == LUA_TUSERDATA) { if (allocator) THError("Passing allocator not supported when using storage views"); THStorage *src = luaT_checkudata(L, index, torch_Storage); real *ptr = src->data; ptrdiff_t offset = luaL_optinteger(L, index + 1, 1) - 1; if (offset < 0 || offset >= src->size) { luaL_error(L, "offset out of bounds"); } ptrdiff_t size = luaL_optinteger(L, index + 2, src->size - offset); if (size < 1 || size > (src->size - offset)) { luaL_error(L, "size out of bounds"); } storage = THStorage_(newWithData)(ptr + offset, size); storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_VIEW; storage->view = src; THStorage_(retain)(storage->view); } else if(lua_type(L, index + 1) == LUA_TNUMBER) { ptrdiff_t size = luaL_optinteger(L, index, 0); real *ptr = (real *)luaL_optinteger(L, index + 1, 0); if (allocator) storage = THStorage_(newWithDataAndAllocator)(ptr, size, allocator, NULL); else storage = THStorage_(newWithData)(ptr, size); storage->flag = TH_STORAGE_REFCOUNTED; } else { ptrdiff_t size = luaL_optinteger(L, index, 0); if (allocator) storage = THStorage_(newWithAllocator)(size, allocator, NULL); else storage = THStorage_(newWithSize)(size); } luaT_pushudata(L, storage, torch_Storage); return 1; } static int torch_Storage_(retain)(lua_State *L) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); THStorage_(retain)(storage); return 0; } static int torch_Storage_(free)(lua_State *L) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); THStorage_(free)(storage); return 0; } static int torch_Storage_(resize)(lua_State *L) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); ptrdiff_t size = luaL_checkinteger(L, 2); /* int keepContent = luaT_optboolean(L, 3, 0); */ THStorage_(resize)(storage, size);/*, keepContent); */ lua_settop(L, 1); return 1; } static int torch_Storage_(copy)(lua_State *L) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); void *src; if( (src = luaT_toudata(L, 2, torch_Storage)) ) THStorage_(copy)(storage, src); else if( (src = luaT_toudata(L, 2, "torch.ByteStorage")) ) THStorage_(copyByte)(storage, src); else if( (src = luaT_toudata(L, 2, "torch.CharStorage")) ) THStorage_(copyChar)(storage, src); else if( (src = luaT_toudata(L, 2, "torch.ShortStorage")) ) THStorage_(copyShort)(storage, src); else if( (src = luaT_toudata(L, 2, "torch.IntStorage")) ) THStorage_(copyInt)(storage, src); else if( (src = luaT_toudata(L, 2, "torch.LongStorage")) ) THStorage_(copyLong)(storage, src); else if( (src = luaT_toudata(L, 2, "torch.FloatStorage")) ) THStorage_(copyFloat)(storage, src); else if( (src = luaT_toudata(L, 2, "torch.DoubleStorage")) ) THStorage_(copyDouble)(storage, src); else if( (src = luaT_toudata(L, 2, "torch.HalfStorage")) ) THStorage_(copyHalf)(storage, src); else luaL_typerror(L, 2, "torch.*Storage"); lua_settop(L, 1); return 1; } static int torch_Storage_(fill)(lua_State *L) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); real value = luaG_(checkreal)(L, 2); THStorage_(fill)(storage, value); lua_settop(L, 1); return 1; } static int torch_Storage_(elementSize)(lua_State *L) { luaT_pushinteger(L, THStorage_(elementSize)()); return 1; } static int torch_Storage_(__len__)(lua_State *L) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); luaT_pushinteger(L, storage->size); return 1; } static int torch_Storage_(__newindex__)(lua_State *L) { if(lua_isnumber(L, 2)) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); ptrdiff_t index = luaL_checkinteger(L, 2) - 1; real number = luaG_(checkreal)(L, 3); THStorage_(set)(storage, index, number); lua_pushboolean(L, 1); } else lua_pushboolean(L, 0); return 1; } static int torch_Storage_(__index__)(lua_State *L) { if(lua_isnumber(L, 2)) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); ptrdiff_t index = luaL_checkinteger(L, 2) - 1; luaG_(pushreal)(L, THStorage_(get)(storage, index)); lua_pushboolean(L, 1); return 2; } else { lua_pushboolean(L, 0); return 1; } } #if defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_BYTE) static int torch_Storage_(string)(lua_State *L) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); if(lua_isstring(L, -1)) { size_t len = 0; const char *str = lua_tolstring(L, -1, &len); THStorage_(resize)(storage, len); memmove(storage->data, str, len); lua_settop(L, 1); } else lua_pushlstring(L, (char*)storage->data, storage->size); return 1; /* either storage or string */ } #endif static int torch_Storage_(totable)(lua_State *L) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); ptrdiff_t i; lua_newtable(L); for(i = 0; i < storage->size; i++) { luaG_(pushreal)(L, storage->data[i]); lua_rawseti(L, -2, i+1); } return 1; } static int torch_Storage_(factory)(lua_State *L) { THStorage *storage = THStorage_(new)(); luaT_pushudata(L, storage, torch_Storage); return 1; } static int torch_Storage_(write)(lua_State *L) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); THFile *file = luaT_checkudata(L, 2, "torch.File"); #ifdef DEBUG THAssert(storage->size < LONG_MAX); #endif THFile_writeLongScalar(file, storage->size); THFile_writeRealRaw(file, storage->data, storage->size); return 0; } static int torch_Storage_(read)(lua_State *L) { THStorage *storage = luaT_checkudata(L, 1, torch_Storage); THFile *file = luaT_checkudata(L, 2, "torch.File"); ptrdiff_t size = THFile_readLongScalar(file); THStorage_(resize)(storage, size); THFile_readRealRaw(file, storage->data, storage->size); return 0; } static const struct luaL_Reg torch_Storage_(_) [] = { {"retain", torch_Storage_(retain)}, {"free", torch_Storage_(free)}, {"size", torch_Storage_(__len__)}, {"elementSize", torch_Storage_(elementSize)}, {"__len__", torch_Storage_(__len__)}, {"__newindex__", torch_Storage_(__newindex__)}, {"__index__", torch_Storage_(__index__)}, {"resize", torch_Storage_(resize)}, {"fill", torch_Storage_(fill)}, {"copy", torch_Storage_(copy)}, {"totable", torch_Storage_(totable)}, {"write", torch_Storage_(write)}, {"read", torch_Storage_(read)}, #if defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_BYTE) {"string", torch_Storage_(string)}, #endif {NULL, NULL} }; void torch_Storage_(init)(lua_State *L) { luaT_newmetatable(L, torch_Storage, NULL, torch_Storage_(new), torch_Storage_(free), torch_Storage_(factory)); luaT_setfuncs(L, torch_Storage_(_), 0); lua_pop(L, 1); } #endif generic/Tensor.c000066400000000000000000001155751316246254300141340ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/Tensor.c" #else #include "luaG.h" static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index, int allowNone, int allowTensor, int allowStorage, int allowStride, THStorage **storage_, ptrdiff_t *storageOffset_, THLongStorage **size_, THLongStorage **stride_); static void torch_Tensor_(c_readSizeStride)(lua_State *L, int index, int allowStride, THLongStorage **size_, THLongStorage **stride_); static int torch_Tensor_(size)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); if(lua_isnumber(L,2)) { int dim = luaL_checkint(L, 2)-1; THArgCheck(dim >= 0 && dim < tensor->nDimension, 2, "dimension %d out of range of %dD tensor", dim+1, THTensor_(nDimension)(tensor)); luaT_pushlong(L, tensor->size[dim]); } else { THLongStorage *size = THTensor_(newSizeOf)(tensor); luaT_pushudata(L, size, "torch.LongStorage"); } return 1; } static int torch_Tensor_(elementSize)(lua_State *L) { luaT_pushinteger(L, THStorage_(elementSize)()); return 1; } static int torch_Tensor_(stride)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); if(lua_isnumber(L,2)) { int dim = luaL_checkint(L, 2)-1; THArgCheck(dim >= 0 && dim < tensor->nDimension, 2, "dimension %d out of range of %dD tensor", dim+1, THTensor_(nDimension)(tensor)); luaT_pushlong(L, tensor->stride[dim]); } else { THLongStorage *storage = THLongStorage_newWithSize(tensor->nDimension); memmove(storage->data, tensor->stride, sizeof(long)*tensor->nDimension); luaT_pushudata(L, storage, "torch.LongStorage"); } return 1; } static int torch_Tensor_(nDimension)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); luaT_pushinteger(L, tensor->nDimension); return 1; } static int torch_Tensor_(storage)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); if(tensor->storage) { THStorage_(retain)(tensor->storage); luaT_pushudata(L, tensor->storage, torch_Storage); } else lua_pushnil(L); return 1; } static int torch_Tensor_(storageOffset)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); luaT_pushinteger(L, tensor->storageOffset+1); return 1; } static int torch_Tensor_(new)(lua_State *L) { THTensor *tensor; ptrdiff_t storageOffset; THLongStorage *size, *stride; if(lua_type(L, 1) == LUA_TTABLE) { ptrdiff_t i, j; THLongStorage *counter; ptrdiff_t si = 0; int dimension = 0; int is_finished = 0; lua_settop(L, 1); size = THLongStorage_new(); while( (lua_type(L, -1) == LUA_TTABLE) && (lua_objlen(L, -1) > 0) ) { THLongStorage_resize(size, dimension+1); size->data[dimension] = lua_objlen(L, -1); dimension++; lua_rawgeti(L, -1, 1); } lua_pop(L, 1); counter = THLongStorage_newWithSize(size->size); THLongStorage_fill(counter, 0); tensor = THTensor_(newWithSize)(size, NULL); if(size->size == 0) is_finished = 1; while(!is_finished) { if(!lua_istable(L, -1)) { THLongStorage_free(size); THLongStorage_free(counter); THTensor_(free)(tensor); THError("invalid tensor definition"); } if(lua_objlen(L, -1) != size->data[size->size-1]) { THLongStorage_free(size); THLongStorage_free(counter); THTensor_(free)(tensor); THError("invalid tensor sizes"); } for(i = 0; i < size->data[size->size-1]; i++) { lua_rawgeti(L, -1, i+1); if(!lua_isnumber(L, -1)) { THLongStorage_free(size); THLongStorage_free(counter); THTensor_(free)(tensor); THError("invalid element (not a number)"); } THStorage_(set)(THTensor_(storage)(tensor), si++, luaG_(checkreal)(L, -1)); lua_pop(L, 1); } if(size->size == 1) break; for(i = size->size-2; i >= 0; i--) { if(++counter->data[i] == size->data[i]) { if(i == 0) { is_finished = 1; break; } else { counter->data[i] = 0; lua_pop(L, 1); } } else { lua_pop(L, 1); for(j = i; j < size->size-1; j++) { if(!lua_istable(L, -1)) { THLongStorage_free(size); THLongStorage_free(counter); THTensor_(free)(tensor); THError("invalid tensor definition"); } if(lua_objlen(L, -1) != size->data[j]) { THLongStorage_free(size); THLongStorage_free(counter); THTensor_(free)(tensor); THError("invalid tensor sizes"); } lua_rawgeti(L, -1, counter->data[j]+1); } break; } } } THLongStorage_free(size); THLongStorage_free(counter); } else { THStorage *storage; torch_Tensor_(c_readTensorStorageSizeStride)(L, 1, 1, 1, 1, 1, &storage, &storageOffset, &size, &stride); tensor = THTensor_(newWithStorage)(storage, storageOffset, size, stride); THLongStorage_free(size); THLongStorage_free(stride); } luaT_pushudata(L, tensor, torch_Tensor); return 1; } static int torch_Tensor_(set)(lua_State *L) { THTensor *self = luaT_checkudata(L, 1, torch_Tensor); THStorage *storage; ptrdiff_t storageOffset; THLongStorage *size, *stride; torch_Tensor_(c_readTensorStorageSizeStride)(L, 2, 1, 1, 1, 1, &storage, &storageOffset, &size, &stride); THTensor_(setStorage)(self, storage, storageOffset, size, stride); THLongStorage_free(size); THLongStorage_free(stride); lua_settop(L, 1); return 1; } static int torch_Tensor_(clone)(lua_State *L) { THTensor *self = luaT_checkudata(L, 1, torch_Tensor); self = THTensor_(newClone)(self); luaT_pushudata(L, self, torch_Tensor); return 1; } static int torch_Tensor_(contiguous)(lua_State *L) { THTensor *self = luaT_checkudata(L, 1, torch_Tensor); self = THTensor_(newContiguous)(self); luaT_pushudata(L, self, torch_Tensor); return 1; } /* Resize */ static int torch_Tensor_(resizeAs)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THTensor *src = luaT_checkudata(L, 2, torch_Tensor); THTensor_(resizeAs)(tensor, src); lua_settop(L, 1); return 1; } static int torch_Tensor_(resize)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THLongStorage *size, *stride; torch_Tensor_(c_readSizeStride)(L, 2, 0, &size, &stride); THTensor_(resize)(tensor, size, stride); THLongStorage_free(size); THLongStorage_free(stride); lua_settop(L, 1); return 1; } static int torch_Tensor_(narrow)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); int dimension = luaL_checkint(L, 2)-1; long firstIndex = luaL_checklong(L, 3)-1; long size = luaL_checklong(L, 4); /* THArgCheck( (dimension >= 0) && (dimension < tensor->nDimension), 2, "out of range"); THArgCheck( (firstIndex >= 0) && (firstIndex < tensor->size[dimension]), 3, "out of range"); THArgCheck( (size > 0) && (firstIndex+size <= tensor->size[dimension]), 4, "out of range"); */ tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, dimension, firstIndex, size); luaT_pushudata(L, tensor, torch_Tensor); return 1; } static int torch_Tensor_(sub)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); long d0s = -1, d0e = -1, d1s = -1, d1e = -1, d2s = -1, d2e = -1, d3s = -1, d3e = -1; d0s = luaL_checklong(L, 2)-1; d0e = luaL_checklong(L, 3)-1; if(d0s < 0) d0s += tensor->size[0]+1; if(d0e < 0) d0e += tensor->size[0]+1; THArgCheck(tensor->nDimension > 0, 2, "invalid dimension"); THArgCheck(d0s >= 0 && d0s < tensor->size[0], 2, "out of range"); THArgCheck(d0e >= 0 && d0e < tensor->size[0], 3, "out of range"); THArgCheck(d0e >= d0s, 3, "end smaller than beginning"); if(!lua_isnone(L, 4)) { d1s = luaL_checklong(L, 4)-1; d1e = luaL_checklong(L, 5)-1; if(d1s < 0) d1s += tensor->size[1]+1; if(d1e < 0) d1e += tensor->size[1]+1; THArgCheck(tensor->nDimension > 1, 4, "invalid dimension"); THArgCheck(d1s >= 0 && d1s < tensor->size[1], 4, "out of range"); THArgCheck(d1e >= 0 && d1e < tensor->size[1], 5, "out of range"); THArgCheck(d1e >= d1s, 5, "end smaller than beginning"); if(!lua_isnone(L, 6)) { d2s = luaL_checklong(L, 6)-1; d2e = luaL_checklong(L, 7)-1; if(d2s < 0) d2s += tensor->size[2]+1; if(d2e < 0) d2e += tensor->size[2]+1; THArgCheck(tensor->nDimension > 2, 6, "invalid dimension"); THArgCheck(d2s >= 0 && d2s < tensor->size[2], 6, "out of range"); THArgCheck(d2e >= 0 && d2e < tensor->size[2], 7, "out of range"); THArgCheck(d2e >= d2s, 7, "end smaller than beginning"); if(!lua_isnone(L, 8)) { d3s = luaL_checklong(L, 8)-1; d3e = luaL_checklong(L, 9)-1; if(d3s < 0) d3s += tensor->size[3]+1; if(d3e < 0) d3e += tensor->size[3]+1; THArgCheck(tensor->nDimension > 3, 8, "invalid dimension"); THArgCheck(d3s >= 0 && d3s < tensor->size[3], 8, "out of range"); THArgCheck(d3e >= 0 && d3e < tensor->size[3], 9, "out of range"); THArgCheck(d3e >= d3s, 9, "end smaller than beginning"); } } } tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, 0, d0s, d0e-d0s+1); if(d1s >= 0) THTensor_(narrow)(tensor, NULL, 1, d1s, d1e-d1s+1); if(d2s >= 0) THTensor_(narrow)(tensor, NULL, 2, d2s, d2e-d2s+1); if(d3s >= 0) THTensor_(narrow)(tensor, NULL, 3, d3s, d3e-d3s+1); luaT_pushudata(L, tensor, torch_Tensor); return 1; } static int torch_Tensor_(select)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); int dimension = luaL_checkint(L, 2)-1; long sliceIndex = luaL_checklong(L, 3)-1; /* THArgCheck(src->nDimension > 1, 1, "cannot select on a vector"); THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, "out of range"); THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size[dimension]), 3, "out of range"); */ if(tensor->nDimension > 1) { tensor = THTensor_(newWithTensor)(tensor); THTensor_(select)(tensor, NULL, dimension, sliceIndex); luaT_pushudata(L, tensor, torch_Tensor); } else { THArgCheck(tensor->nDimension == 1, 1, "empty Tensor"); luaG_(pushreal)(L, THTensor_(get1d)(tensor, sliceIndex)); } return 1; } #ifndef TH_REAL_IS_HALF static int torch_Tensor_(indexSelect)(lua_State *L) { int narg = lua_gettop(L); THTensor *tensor, *src; THLongTensor *index; int dim; if (narg == 3) { tensor = THTensor_(new)(); src = luaT_checkudata(L, 1, torch_Tensor); dim = luaL_checkint(L, 2) - 1; index = luaT_checkudata(L, 3, "torch.LongTensor"); luaT_pushudata(L,tensor,torch_Tensor); } else if(narg == 4) { src = luaT_checkudata(L, 2, torch_Tensor); dim = luaL_checkint(L, 3) - 1; index = luaT_checkudata(L, 4, "torch.LongTensor"); tensor = luaT_checkudata(L,1,torch_Tensor); } else { THError(torch_Tensor ", number, torch.LongTensor | " torch_Tensor ", " torch_Tensor ", number, torch.LongTensor expected"); return 0; } THTensor_(indexSelect)(tensor,src,dim,index); return 1; } static int torch_Tensor_(indexCopy)(lua_State *L) { int narg = lua_gettop(L); THTensor *tensor, *src; THLongTensor *index; int dim; if(narg == 4) { dim = luaL_checkint(L, 2) - 1; index = luaT_checkudata(L, 3, "torch.LongTensor"); src = luaT_checkudata(L, 4, torch_Tensor); tensor = luaT_checkudata(L,1,torch_Tensor); } else { THError( torch_Tensor ", number, torch.LongTensor, " torch_Tensor " expected"); return 0; } THTensor_(indexCopy)(tensor,dim,index,src); return 1; } static int torch_Tensor_(indexAdd)(lua_State *L) { int narg = lua_gettop(L); THTensor *tensor, *src; THLongTensor *index; int dim; if(narg == 4) { dim = luaL_checkint(L, 2) - 1; index = luaT_checkudata(L, 3, "torch.LongTensor"); src = luaT_checkudata(L, 4, torch_Tensor); tensor = luaT_checkudata(L,1,torch_Tensor); } else { THError( torch_Tensor ", number, torch.LongTensor, " torch_Tensor " expected"); return 0; } THTensor_(indexAdd)(tensor,dim,index,src); return 1; } static int torch_Tensor_(indexFill)(lua_State *L) { int narg = lua_gettop(L); THTensor *tensor; THLongTensor *index; real val; int dim; if(narg == 4) { dim = luaL_checkint(L, 2) - 1; index = luaT_checkudata(L, 3, "torch.LongTensor"); val = luaG_(checkreal)(L, 4); tensor = luaT_checkudata(L,1,torch_Tensor); } else { THError( torch_Tensor ", number, torch.LongTensor, number expected"); return 0; } THTensor_(indexFill)(tensor,dim,index,val); return 1; } static int torch_Tensor_(maskedSelect)(lua_State *L) { int narg = lua_gettop(L); THTensor *tensor, *src; THByteTensor *mask; if (narg == 2) { tensor = THTensor_(new)(); src = luaT_checkudata(L, 1, torch_Tensor); mask = luaT_checkudata(L, 2, "torch.ByteTensor"); luaT_pushudata(L,tensor,torch_Tensor); } else if(narg == 3) { src = luaT_checkudata(L, 2, torch_Tensor); mask = luaT_checkudata(L, 3, "torch.ByteTensor"); tensor = luaT_checkudata(L,1,torch_Tensor); } else { THError( torch_Tensor ", torch.ByteTensor | " torch_Tensor ", " torch_Tensor ", torch.ByteTensor expected"); return 0; } THTensor_(maskedSelect)(tensor,src,mask); return 1; } static int torch_Tensor_(maskedCopy)(lua_State *L) { int narg = lua_gettop(L); THTensor *tensor, *src; THByteTensor *mask; if(narg == 3) { mask = luaT_checkudata(L, 2, "torch.ByteTensor"); src = luaT_checkudata(L, 3, torch_Tensor); tensor = luaT_checkudata(L,1,torch_Tensor); } else { THError( torch_Tensor ", torch.ByteTensor, " torch_Tensor " expected"); return 0; } THTensor_(maskedCopy)(tensor,mask,src); /* return destination */ lua_pop(L, 2); return 1; } static int torch_Tensor_(maskedFill)(lua_State *L) { int narg = lua_gettop(L); THTensor *tensor; THByteTensor *mask; real val; if(narg == 3) { mask = luaT_checkudata(L, 2, "torch.ByteTensor"); val = luaG_(checkreal)(L, 3); tensor = luaT_checkudata(L,1,torch_Tensor); } else { THError( torch_Tensor ", torch.ByteTensor, number expected"); return 0; } THTensor_(maskedFill)(tensor,mask,val); return 1; } #endif static int torch_Tensor_(transpose)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); int dimension1 = luaL_checkint(L, 2)-1; int dimension2 = luaL_checkint(L, 3)-1; /* THArgCheck( (dimension1 >= 0) && (dimension1 < src->nDimension), 2, "out of range"); THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 3, "out of range"); */ tensor = THTensor_(newWithTensor)(tensor); THTensor_(transpose)(tensor, NULL, dimension1, dimension2); luaT_pushudata(L, tensor, torch_Tensor); return 1; } static int torch_Tensor_(t)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THArgCheck(tensor->nDimension == 2, 1, "Tensor must have 2 dimensions"); tensor = THTensor_(newWithTensor)(tensor); THTensor_(transpose)(tensor, NULL, 0, 1); luaT_pushudata(L, tensor, torch_Tensor); return 1; } static int torch_Tensor_(unfold)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); int dimension = luaL_checkint(L, 2)-1; long size = luaL_checklong(L, 3); long step = luaL_checklong(L, 4); /* THArgCheck( (src->nDimension > 0), 1, "cannot unfold an empty tensor"); THArgCheck(dimension < src->nDimension, 2, "out of range"); THArgCheck(size <= src->size[dimension], 3, "out of range"); */ tensor = THTensor_(newWithTensor)(tensor); THTensor_(unfold)(tensor, NULL, dimension, size, step); luaT_pushudata(L, tensor, torch_Tensor); return 1; } /* is contiguous? [a bit like in TnXIterator] */ static int torch_Tensor_(isContiguous)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); lua_pushboolean(L, THTensor_(isContiguous)(tensor)); return 1; } static int torch_Tensor_(isSize)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THLongStorage *size = luaT_checkudata(L, 2, "torch.LongStorage"); lua_pushboolean(L, THTensor_(isSize)(tensor, size)); return 1; } static int torch_Tensor_(isSameSizeAs)(lua_State *L) { THTensor *tensor1 = luaT_checkudata(L, 1, torch_Tensor); THTensor *tensor2 = luaT_checkudata(L, 2, torch_Tensor); lua_pushboolean(L, THTensor_(isSameSizeAs)(tensor1, tensor2)); return 1; } static int torch_Tensor_(isSetTo)(lua_State *L) { THTensor *tensor1 = luaT_checkudata(L, 1, torch_Tensor); THTensor *tensor2 = luaT_checkudata(L, 2, torch_Tensor); lua_pushboolean(L, THTensor_(isSetTo)(tensor1, tensor2)); return 1; } static int torch_Tensor_(nElement)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); luaT_pushinteger(L, THTensor_(nElement)(tensor)); return 1; } static int torch_Tensor_(copy)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); void *src; if( (src = luaT_toudata(L, 2, torch_Tensor)) ) THTensor_(copy)(tensor, src); else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) ) THTensor_(copyByte)(tensor, src); else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) ) THTensor_(copyChar)(tensor, src); else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) ) THTensor_(copyShort)(tensor, src); else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) ) THTensor_(copyInt)(tensor, src); else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) ) THTensor_(copyLong)(tensor, src); else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) ) THTensor_(copyFloat)(tensor, src); else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) ) THTensor_(copyDouble)(tensor, src); else if( (src = luaT_toudata(L, 2, "torch.HalfTensor")) ) THTensor_(copyHalf)(tensor, src); else luaL_typerror(L, 2, "torch.*Tensor"); lua_settop(L, 1); return 1; } static int torch_Tensor_(__newindex__)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THLongStorage *idx = NULL; THByteTensor *mask; if(lua_isnumber(L, 2)) { void *src; long index = luaL_checklong(L,2)-1; THArgCheck(tensor->nDimension > 0, 1, "empty tensor"); if (index < 0) index = tensor->size[0] + index + 1; if (lua_isnumber(L,3)) { real value = luaG_(checkreal)(L,3); if (tensor->nDimension == 1) { THArgCheck(index >= 0 && index < tensor->size[0], 2, "out of range"); THStorage_(set)(tensor->storage, tensor->storageOffset+index*tensor->stride[0], value); } else { #ifndef TH_REAL_IS_HALF tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, 0, index, 1); THTensor_(fill)(tensor, value); THTensor_(free)(tensor); #else THError("fill on torch.HalfTensor not yet supported"); #endif } } else if( (src = luaT_toudata(L, 3, torch_Tensor)) ) { tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, 0, index, 1); THTensor_(copy)(tensor, src); THTensor_(free)(tensor); } else if( (src = luaT_toudata(L, 3, "torch.ByteTensor")) ) { tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, 0, index, 1); THTensor_(copyByte)(tensor, src); THTensor_(free)(tensor); } else if( (src = luaT_toudata(L, 3, "torch.CharTensor")) ) { tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, 0, index, 1); THTensor_(copyChar)(tensor, src); THTensor_(free)(tensor); } else if( (src = luaT_toudata(L, 3, "torch.ShortTensor")) ) { tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, 0, index, 1); THTensor_(copyShort)(tensor, src); THTensor_(free)(tensor); } else if( (src = luaT_toudata(L, 3, "torch.IntTensor")) ) { tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, 0, index, 1); THTensor_(copyInt)(tensor, src); THTensor_(free)(tensor); } else if( (src = luaT_toudata(L, 3, "torch.LongTensor")) ) { tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, 0, index, 1); THTensor_(copyLong)(tensor, src); THTensor_(free)(tensor); } else if( (src = luaT_toudata(L, 3, "torch.FloatTensor")) ) { tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, 0, index, 1); THTensor_(copyFloat)(tensor, src); THTensor_(free)(tensor); } else if( (src = luaT_toudata(L, 3, "torch.DoubleTensor")) ) { tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, 0, index, 1); THTensor_(copyDouble)(tensor, src); THTensor_(free)(tensor); } else if( (src = luaT_toudata(L, 3, "torch.HalfTensor")) ) { tensor = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(tensor, NULL, 0, index, 1); THTensor_(copyHalf)(tensor, src); THTensor_(free)(tensor); } else { luaL_typerror(L, 3, "torch.*Tensor"); } lua_pushboolean(L, 1); } else if((idx = luaT_toudata(L, 2, "torch.LongStorage"))) { ptrdiff_t index = THTensor_(storageOffset)(tensor); real value = luaG_(checkreal)(L,3); int dim; THArgCheck(idx->size == tensor->nDimension, 2, "invalid size"); for(dim = 0; dim < idx->size; dim++) { long z = idx->data[dim]-1; if (z < 0) z = tensor->size[dim] + z + 1; THArgCheck((z >= 0) && (z < tensor->size[dim]), 2, "index out of bound"); index += z*tensor->stride[dim]; } THStorage_(set)(tensor->storage, index, value); lua_pushboolean(L, 1); } else if(lua_istable(L, 2)) { int dim; int cdim = 0; int ndims; int done = 0; ndims = tensor->nDimension; THArgCheck(lua_objlen(L, 2) <= ndims, 2, "too many indices provided"); tensor = THTensor_(newWithTensor)(tensor); for(dim = 0; dim < ndims; dim++) { lua_rawgeti(L, 2, dim+1); if(lua_isnumber(L, -1)) { long z = lua_tonumber(L, -1)-1; lua_pop(L, 1); if (z < 0) z = tensor->size[cdim] + z + 1; THArgCheck((z >= 0) && (z < tensor->size[cdim]), 2, "index out of bound"); if(tensor->nDimension == 1) { real value = luaG_(checkreal)(L,3); done = 1; THStorage_(set)(tensor->storage, tensor->storageOffset+z*tensor->stride[0], value); } else { THTensor_(select)(tensor, NULL, cdim, z); } } else if (lua_istable(L, -1)) { long start = 0; long end = tensor->size[cdim]-1; lua_rawgeti(L, -1, 1); if(lua_isnumber(L, -1)) { start = lua_tonumber(L, -1)-1; end = start; } lua_pop(L, 1); if (start < 0) start = tensor->size[cdim] + start + 1; THArgCheck((start >= 0) && (start < tensor->size[cdim]), 2, "start index out of bound"); lua_rawgeti(L, -1, 2); if(lua_isnumber(L, -1)) { end = lua_tonumber(L, -1)-1; } lua_pop(L, 2); if (end < 0) end = tensor->size[cdim] + end + 1; THArgCheck((end >= 0) && (end < tensor->size[cdim]), 2, "end index out of bound"); THArgCheck((end >= start), 2, "end index must be greater or equal to start index"); THTensor_(narrow)(tensor, NULL, cdim++, start, end-start+1); } else { break; } } if(!done) { /* doing a copy */ void *src; if (lua_isnumber(L,3)) { #ifndef TH_REAL_IS_HALF THTensor_(fill)(tensor, LUA_NUMBER_TO_REAL(lua_tonumber(L,3))); #else THError("fill on torch.HalfTensor not yet supported"); #endif } else if( (src = luaT_toudata(L, 3, torch_Tensor)) ) { THTensor_(copy)(tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.ByteTensor")) ) { THTensor_(copyByte)(tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.CharTensor")) ) { THTensor_(copyChar)(tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.ShortTensor")) ) { THTensor_(copyShort)(tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.IntTensor")) ) { THTensor_(copyInt)(tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.LongTensor")) ) { THTensor_(copyLong)(tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.FloatTensor")) ) { THTensor_(copyFloat)(tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.DoubleTensor")) ) { THTensor_(copyDouble)(tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.HalfTensor")) ) { THTensor_(copyHalf)(tensor, src); } else { luaL_typerror(L, 3, "torch.*Tensor"); } } THTensor_(free)(tensor); lua_pushboolean(L, 1); } else if((mask = luaT_toudata(L, 2, "torch.ByteTensor"))) { #ifndef TH_REAL_IS_HALF THTensor *vals; if (lua_isnumber(L, 3)) { THTensor_(maskedFill)(tensor, mask, luaG_(checkreal)(L,3)); } else if((vals = luaT_toudata(L, 3, torch_Tensor))) { THTensor_(maskedCopy)(tensor, mask, vals); } else { THError("number or " torch_Tensor " expected"); } #else THError("ByteTensor indexing not yet supported with half types"); #endif } else lua_pushboolean(L, 0); return 1; } static int torch_Tensor_(__index__)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THLongStorage *idx = NULL; THByteTensor *mask; if(lua_isnumber(L, 2)) { long index = luaL_checklong(L,2)-1; THArgCheck(tensor->nDimension > 0, 1, "empty tensor"); if (index < 0) index = tensor->size[0] + index + 1; THArgCheck(index >= 0 && index < tensor->size[0], 2, "out of range"); if(tensor->nDimension == 1) { luaG_(pushreal)(L, THStorage_(get)(tensor->storage, tensor->storageOffset+index*tensor->stride[0])); } else { tensor = THTensor_(newWithTensor)(tensor); THTensor_(select)(tensor, NULL, 0, index); luaT_pushudata(L, tensor, torch_Tensor); } lua_pushboolean(L, 1); return 2; } else if((idx = luaT_toudata(L, 2, "torch.LongStorage"))) { ptrdiff_t index = THTensor_(storageOffset)(tensor); int dim; THArgCheck(idx->size == tensor->nDimension, 2, "invalid size"); for(dim = 0; dim < idx->size; dim++) { long z = idx->data[dim]-1; if (z < 0) z = tensor->size[dim] + z + 1; THArgCheck((z >= 0) && (z < tensor->size[dim]), 2, "index out of bound"); index += z*tensor->stride[dim]; } luaG_(pushreal)(L, THStorage_(get)(THTensor_(storage)(tensor), index)); lua_pushboolean(L, 1); return 2; } else if(lua_istable(L, 2)) { int dim; int cdim = 0; int ndims; int done = 0; ndims = tensor->nDimension; THArgCheck(lua_objlen(L, 2) <= ndims, 2, "too many indices provided"); tensor = THTensor_(newWithTensor)(tensor); for(dim = 0; dim < ndims; dim++) { lua_rawgeti(L, 2, dim+1); if(lua_isnumber(L, -1)) { long z = lua_tonumber(L, -1)-1; lua_pop(L, 1); if (z < 0) z = tensor->size[cdim] + z + 1; THArgCheck((z >= 0) && (z < tensor->size[cdim]), 2, "index out of bound"); if(tensor->nDimension == 1) { done = 1; luaG_(pushreal)(L, THStorage_(get)(tensor->storage, tensor->storageOffset+z*tensor->stride[0])); } else { THTensor_(select)(tensor, NULL, cdim, z); } } else if (lua_istable(L, -1)) { long start = 0; long end = tensor->size[cdim]-1; lua_rawgeti(L, -1, 1); if(lua_isnumber(L, -1)) { start = lua_tonumber(L, -1)-1; end = start; } lua_pop(L, 1); if (start < 0) start = tensor->size[cdim] + start + 1; THArgCheck((start >= 0) && (start < tensor->size[cdim]), 2, "start index out of bound"); lua_rawgeti(L, -1, 2); if(lua_isnumber(L, -1)) { end = lua_tonumber(L, -1)-1; } lua_pop(L, 2); if (end < 0) end = tensor->size[cdim] + end + 1; THArgCheck((end >= 0) && (end < tensor->size[cdim]), 2, "end index out of bound"); THArgCheck((end >= start), 2, "end index must be greater or equal to start index"); THTensor_(narrow)(tensor, NULL, cdim++, start, end-start+1); } else { break; } } if(!done) { luaT_pushudata(L, tensor, torch_Tensor); } else { THTensor_(free)(tensor); } lua_pushboolean(L, 1); return 2; } else if((mask = luaT_toudata(L, 2, "torch.ByteTensor"))) { #ifndef TH_REAL_IS_HALF THTensor *vals = THTensor_(new)(); THTensor_(maskedSelect)(vals, tensor, mask); luaT_pushudata(L, vals, torch_Tensor); lua_pushboolean(L, 1); return 2; #else THError("ByteTensor based indexing not yetsupported with half type"); return 0; #endif } else { lua_pushboolean(L, 0); return 1; } } static int torch_Tensor_(retain)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THTensor_(retain)(tensor); return 0; } static int torch_Tensor_(free)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THTensor_(free)(tensor); return 0; } /* helpful functions */ static void torch_Tensor_(c_readSizeStride)(lua_State *L, int index, int allowStride, THLongStorage **size_, THLongStorage **stride_) { THLongStorage *size = NULL; THLongStorage *stride = NULL; if( (size = luaT_toudata(L, index, "torch.LongStorage")) ) { if(!lua_isnoneornil(L, index+1)) { if( (stride = luaT_toudata(L, index+1, "torch.LongStorage")) ) THArgCheck(stride->size == size->size, index+1, "provided stride and size are inconsistent"); else THArgCheck(0, index+1, "torch.LongStorage expected"); } THLongStorage_retain(size); if(stride) THLongStorage_retain(stride); } else { int i; size = THLongStorage_newWithSize(8); stride = THLongStorage_newWithSize(8); THLongStorage_fill(size, -1); THLongStorage_fill(stride, -1); if(allowStride) { for(i = 0; i < 8; i++) { if(lua_isnone(L, index+2*i)) break; size->data[i] = luaL_checklong(L, index+2*i); if(lua_isnone(L, index+2*i+1)) break; stride->data[i] = luaL_checklong(L, index+2*i+1); } } else { for(i = 0; i < 8; i++) { if(lua_isnone(L, index+i)) break; size->data[i] = luaL_checklong(L, index+i); } } } *size_ = size; *stride_ = stride; } static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index, int allowNone, int allowTensor, int allowStorage, int allowStride, THStorage **storage_, ptrdiff_t *storageOffset_, THLongStorage **size_, THLongStorage **stride_) { THTensor *src = NULL; THStorage *storage = NULL; int arg1Type = lua_type(L, index); if( allowNone && (arg1Type == LUA_TNONE) ) { *storage_ = NULL; *storageOffset_ = 0; *size_ = NULL; *stride_ = NULL; return; } else if( allowTensor && (arg1Type == LUA_TUSERDATA) && (src = luaT_toudata(L, index, torch_Tensor)) ) { *storage_ = src->storage; *storageOffset_ = src->storageOffset; *size_ = THTensor_(newSizeOf)(src); *stride_ = THTensor_(newStrideOf)(src); return; } else if( allowStorage && (arg1Type == LUA_TUSERDATA) && (storage = luaT_toudata(L, index, torch_Storage)) ) { *storage_ = storage; if(lua_isnone(L, index+1)) { *storageOffset_ = 0; *size_ = THLongStorage_newWithSize1(storage->size); *stride_ = THLongStorage_newWithSize1(1); } else { *storageOffset_ = luaL_checkinteger(L, index+1)-1; torch_Tensor_(c_readSizeStride)(L, index+2, allowStride, size_, stride_); } return; } else if( (arg1Type == LUA_TNUMBER) || (luaT_toudata(L, index, "torch.LongStorage")) ) { *storage_ = NULL; *storageOffset_ = 0; torch_Tensor_(c_readSizeStride)(L, index, 0, size_, stride_); return; } *storage_ = NULL; *storageOffset_ = 0; if(allowTensor && allowStorage) THArgCheck(0, index, "expecting number or " torch_Tensor " or " torch_Storage ); else if(allowTensor) THArgCheck(0, index, "expecting number or " torch_Tensor ); else if(allowStorage) THArgCheck(0, index, "expecting number or " torch_Storage ); else THArgCheck(0, index, "expecting number"); } #ifndef TH_REAL_IS_HALF static int torch_Tensor_(apply)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); luaL_checktype(L, 2, LUA_TFUNCTION); lua_settop(L, 2); TH_TENSOR_APPLY(real, tensor, lua_pushvalue(L, 2); luaG_(pushreal)(L, *tensor_data); lua_call(L, 1, 1); if(lua_isnumber(L, 3)) { *tensor_data = luaG_(checkreal)(L, 3); lua_pop(L, 1); } else if(lua_isnil(L, 3)) lua_pop(L, 1); else THError("given function should return a number or nil");); lua_settop(L, 1); return 1; } static int torch_Tensor_(map)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THTensor *src = luaT_checkudata(L, 2, torch_Tensor); luaL_checktype(L, 3, LUA_TFUNCTION); lua_settop(L, 3); TH_TENSOR_APPLY2(real, tensor, real, src, lua_pushvalue(L, 3); luaG_(pushreal)(L, *tensor_data); luaG_(pushreal)(L, *src_data); lua_call(L, 2, 1); if(lua_isnumber(L, 4)) { *tensor_data = luaG_(checkreal)(L, 4); lua_pop(L, 1); } else if(lua_isnil(L, 4)) lua_pop(L, 1); else THError("given function should return a number or nil");); lua_settop(L, 1); return 1; } static int torch_Tensor_(map2)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THTensor *src1 = luaT_checkudata(L, 2, torch_Tensor); THTensor *src2 = luaT_checkudata(L, 3, torch_Tensor); luaL_checktype(L, 4, LUA_TFUNCTION); lua_settop(L, 4); TH_TENSOR_APPLY3(real, tensor, real, src1, real, src2, lua_pushvalue(L, 4); luaG_(pushreal)(L, *tensor_data); luaG_(pushreal)(L, *src1_data); luaG_(pushreal)(L, *src2_data); lua_call(L, 3, 1); if(lua_isnumber(L, 5)) { *tensor_data = luaG_(checkreal)(L, 5); lua_pop(L, 1); } else if(lua_isnil(L, 5)) lua_pop(L, 1); else THError("given function should return a number or nil");); lua_settop(L, 1); return 1; } #endif static int torch_Tensor_(factory)(lua_State *L) { THTensor *tensor = THTensor_(new)(); luaT_pushudata(L, tensor, torch_Tensor); return 1; } static int torch_Tensor_(write)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THFile *file = luaT_checkudata(L, 2, "torch.File"); THFile_writeIntScalar(file, tensor->nDimension); THFile_writeLongRaw(file, tensor->size, tensor->nDimension); THFile_writeLongRaw(file, tensor->stride, tensor->nDimension); THFile_writeLongScalar(file, tensor->storageOffset+1); /* to respect Lua convention */ lua_getfield(L, 2, "writeObject"); /* the method */ lua_pushvalue(L, 2); /* the file */ /* the storage */ if(tensor->storage) { THStorage_(retain)(tensor->storage); luaT_pushudata(L, tensor->storage, torch_Storage); } else lua_pushnil(L); lua_call(L, 2, 0); /* call the method */ return 0; } static int torch_Tensor_(read)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THFile *file = luaT_checkudata(L, 2, "torch.File"); tensor->nDimension = THFile_readIntScalar(file); tensor->size = THAlloc(sizeof(long)*tensor->nDimension); tensor->stride = THAlloc(sizeof(long)*tensor->nDimension); THFile_readLongRaw(file, tensor->size, tensor->nDimension); THFile_readLongRaw(file, tensor->stride, tensor->nDimension); tensor->storageOffset = THFile_readLongScalar(file); tensor->storageOffset--; /* to respect Lua convention */ lua_getfield(L, 2, "readObject"); /* the method */ lua_pushvalue(L, 2); /* the file */ lua_call(L, 1, 1); /* call the method */ tensor->storage = luaT_toudata(L, -1, torch_Storage); if(tensor->storage) THStorage_(retain)(tensor->storage); return 0; } static const struct luaL_Reg torch_Tensor_(_) [] = { {"retain", torch_Tensor_(retain)}, {"free", torch_Tensor_(free)}, {"contiguous", torch_Tensor_(contiguous)}, {"size", torch_Tensor_(size)}, {"elementSize", torch_Tensor_(elementSize)}, {"__len__", torch_Tensor_(size)}, {"stride", torch_Tensor_(stride)}, {"dim", torch_Tensor_(nDimension)}, {"nDimension", torch_Tensor_(nDimension)}, {"set", torch_Tensor_(set)}, {"storage", torch_Tensor_(storage)}, {"storageOffset", torch_Tensor_(storageOffset)}, {"clone", torch_Tensor_(clone)}, {"contiguous", torch_Tensor_(contiguous)}, {"resizeAs", torch_Tensor_(resizeAs)}, {"resize", torch_Tensor_(resize)}, {"narrow", torch_Tensor_(narrow)}, {"sub", torch_Tensor_(sub)}, {"select", torch_Tensor_(select)}, #ifndef TH_REAL_IS_HALF {"index", torch_Tensor_(indexSelect)}, {"indexCopy", torch_Tensor_(indexCopy)}, {"indexAdd", torch_Tensor_(indexAdd)}, {"indexFill", torch_Tensor_(indexFill)}, {"maskedSelect", torch_Tensor_(maskedSelect)}, {"maskedCopy", torch_Tensor_(maskedCopy)}, {"maskedFill", torch_Tensor_(maskedFill)}, #endif {"transpose", torch_Tensor_(transpose)}, {"t", torch_Tensor_(t)}, {"unfold", torch_Tensor_(unfold)}, {"isContiguous", torch_Tensor_(isContiguous)}, {"isSameSizeAs", torch_Tensor_(isSameSizeAs)}, {"isSetTo", torch_Tensor_(isSetTo)}, {"isSize", torch_Tensor_(isSize)}, {"nElement", torch_Tensor_(nElement)}, {"copy", torch_Tensor_(copy)}, #ifndef TH_REAL_IS_HALF {"apply", torch_Tensor_(apply)}, {"map", torch_Tensor_(map)}, {"map2", torch_Tensor_(map2)}, #endif {"read", torch_Tensor_(read)}, {"write", torch_Tensor_(write)}, {"__index__", torch_Tensor_(__index__)}, {"__newindex__", torch_Tensor_(__newindex__)}, {NULL, NULL} }; void torch_Tensor_(init)(lua_State *L) { luaT_newmetatable(L, torch_Tensor, NULL, torch_Tensor_(new), torch_Tensor_(free), torch_Tensor_(factory)); luaT_setfuncs(L, torch_Tensor_(_), 0); lua_pop(L, 1); #ifndef TH_REAL_IS_HALF THVector_(vectorDispatchInit)(); #endif } #endif generic/TensorOperator.c000066400000000000000000000115241316246254300156350ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/TensorOperator.c" #else #include "luaG.h" static int torch_TensorOperator_(__add__)(lua_State *L) { THTensor *tensor1 = luaT_toudata(L, 1, torch_Tensor); THTensor *tensor2 = luaT_toudata(L, 2, torch_Tensor); THTensor *r; if(!tensor1 && !tensor2) luaL_error(L, "expecting two " torch_Tensor "s or one " torch_Tensor " and one number"); else { r = THTensor_(new)(); luaT_pushudata(L, r, torch_Tensor); if(!tensor1 && tensor2) { THTensor_(resizeAs)(r, tensor2); THTensor_(copy)(r, tensor2); THTensor_(add)(r, r, luaG_(checkreal)(L, 1)); } else if(tensor1 && !tensor2) { THTensor_(resizeAs)(r, tensor1); THTensor_(copy)(r, tensor1); THTensor_(add)(r, r, luaG_(checkreal)(L, 2)); } else { THTensor_(resizeAs)(r, tensor1); THTensor_(copy)(r, tensor1); THTensor_(cadd)(r, r, 1, tensor2); } } return 1; } static int torch_TensorOperator_(__sub__)(lua_State *L) { THTensor *tensor1 = luaT_toudata(L, 1, torch_Tensor); THTensor *tensor2 = luaT_toudata(L, 2, torch_Tensor); THTensor *r; if(!tensor1 && !tensor2) luaL_error(L, "expecting two " torch_Tensor "s or one " torch_Tensor " and one number"); else { r = THTensor_(new)(); luaT_pushudata(L, r, torch_Tensor); if(!tensor1 && tensor2) { THTensor_(resizeAs)(r, tensor2); THTensor_(fill)(r, luaG_(checkreal)(L, 1)); THTensor_(cadd)(r, r, -1, tensor2); } else if(tensor1 && !tensor2) { THTensor_(resizeAs)(r, tensor1); THTensor_(copy)(r, tensor1); THTensor_(add)(r, r, -luaG_(checkreal)(L, 2)); } else { THTensor_(resizeAs)(r, tensor1); THTensor_(copy)(r, tensor1); THTensor_(cadd)(r, r, -1, tensor2); } } return 1; } static int torch_TensorOperator_(__unm__)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THTensor *r; r = THTensor_(new)(); luaT_pushudata(L, r, torch_Tensor); THTensor_(resizeAs)(r, tensor); THTensor_(copy)(r, tensor); THTensor_(mul)(r, r, -1); return 1; } static int torch_TensorOperator_(__mul__)(lua_State *L) { THTensor *tensor1 = luaT_toudata(L, 1, torch_Tensor); THTensor *tensor2 = luaT_toudata(L, 2, torch_Tensor); THTensor *r; if(!tensor1 && !tensor2) luaL_error(L, "expecting two " torch_Tensor "s or one " torch_Tensor " and one number"); else { r = THTensor_(new)(); luaT_pushudata(L, r, torch_Tensor); if(!tensor1 && tensor2) { THTensor_(resizeAs)(r, tensor2); THTensor_(copy)(r, tensor2); THTensor_(mul)(r, r, luaG_(checkreal)(L, 1)); } else if(tensor1 && !tensor2) { THTensor_(resizeAs)(r, tensor1); THTensor_(copy)(r, tensor1); THTensor_(mul)(r, r, luaG_(checkreal)(L, 2)); } else { int dimt = tensor1->nDimension; int dims = tensor2->nDimension; if(dimt == 1 && dims == 1) luaG_(pushreal)(L, THTensor_(dot)(tensor1, tensor2)); /* ok, we wasted r, but who cares */ else if(dimt == 2 && dims == 1) { THTensor_(resize1d)(r, tensor1->size[0]); THTensor_(zero)(r); THTensor_(addmv)(r, 1, r, 1, tensor1, tensor2); } else if(dimt == 2 && dims == 2) { THTensor_(resize2d)(r, tensor1->size[0], tensor2->size[1]); THTensor_(zero)(r); THTensor_(addmm)(r, 1, r, 1, tensor1, tensor2); } else luaL_error(L, "multiplication between %dD and %dD tensors not yet supported", tensor1->nDimension, tensor2->nDimension); } } return 1; } static int torch_TensorOperator_(__div__)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THTensor *r; THArgCheck(lua_isnumber(L,2), 2, "number expected"); r = THTensor_(new)(); luaT_pushudata(L, r, torch_Tensor); THTensor_(resizeAs)(r, tensor); THTensor_(copy)(r, tensor); THTensor_(div)(r, r, lua_tonumber(L, 2)); return 1; } static int torch_TensorOperator_(__mod__)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THTensor *r; THArgCheck(lua_isnumber(L,2), 2, "number expected"); r = THTensor_(new)(); luaT_pushudata(L, r, torch_Tensor); THTensor_(resizeAs)(r, tensor); THTensor_(copy)(r, tensor); THTensor_(remainder)(r, r, lua_tonumber(L, 2)); return 1; } static const struct luaL_Reg torch_TensorOperator_(_) [] = { {"__add__", torch_TensorOperator_(__add__)}, {"__sub__", torch_TensorOperator_(__sub__)}, {"__unm__", torch_TensorOperator_(__unm__)}, {"__mul__", torch_TensorOperator_(__mul__)}, {"__div__", torch_TensorOperator_(__div__)}, {"__mod__", torch_TensorOperator_(__mod__)}, {NULL, NULL} }; void torch_TensorOperator_(init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_setfuncs(L, torch_TensorOperator_(_), 0); lua_pop(L, 1); } #endif generic/luaG.h000066400000000000000000000043311316246254300135420ustar00rootroot00000000000000#if !defined(real) || !defined(TH_GENERIC_FILE) #error "luaG.h must not be included outside of a generic file." #endif #ifndef luaG_ #define luaG_(NAME) TH_CONCAT_3(luaG_,Real,NAME) #endif #undef REAL_TO_LUA_NUMBER #undef LUA_NUMBER_TO_REAL #if defined(TH_REAL_IS_HALF) # define REAL_TO_LUA_NUMBER(n) (lua_Number)TH_half2float(n) # define LUA_NUMBER_TO_REAL(n) TH_float2half((lua_Number)n) #else # define REAL_TO_LUA_NUMBER(n) (lua_Number)(n) # define LUA_NUMBER_TO_REAL(n) (real)n #endif static void luaG_(pushreal)(lua_State *L, real n) { #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) || LUA_VERSION_NUM < 503 lua_pushnumber(L, REAL_TO_LUA_NUMBER(n)); #elif defined(TH_REAL_IS_BYTE) || defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_SHORT) \ || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_LONG) lua_pushinteger(L, (lua_Integer)n); #else #error "unhandled real type in luaG_pushreal" #endif } static real luaG_(checkreal)(lua_State *L, int idx) { #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) return LUA_NUMBER_TO_REAL(luaL_checknumber(L, idx)); #elif defined(TH_REAL_IS_BYTE) || defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_LONG) int type = lua_type(L, idx); if (type == LUA_TSTRING) { const char *str = lua_tolstring(L, idx, NULL); long int num = strtol(str, NULL, 0); return (real) num; } else { #if LUA_VERSION_NUM < 503 return (lua_Number)luaL_checkinteger(L, idx); #else return (lua_Integer)luaL_checkinteger(L, idx); #endif } #else #error "unhandled real type in luaG_checkreal" #endif } static real luaG_(optreal)(lua_State *L, int idx, real n) { #if defined(TH_REAL_IS_HALF) || defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || LUA_VERSION_NUM < 503 return LUA_NUMBER_TO_REAL(luaL_optnumber(L, idx, REAL_TO_LUA_NUMBER(n))); #elif defined(TH_REAL_IS_BYTE) || defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_LONG) return (lua_Integer)luaL_optinteger(L, idx, (lua_Integer)n); #else #error "unhandled real type in luaG_checkreal" #endif } init.c000066400000000000000000000054151316246254300122000ustar00rootroot00000000000000#include "general.h" #include "utils.h" extern void torch_utils_init(lua_State *L); extern void torch_random_init(lua_State *L); extern void torch_File_init(lua_State *L); extern void torch_DiskFile_init(lua_State *L); extern void torch_MemoryFile_init(lua_State *L); extern void torch_PipeFile_init(lua_State *L); extern void torch_Timer_init(lua_State *L); extern void torch_ByteStorage_init(lua_State *L); extern void torch_CharStorage_init(lua_State *L); extern void torch_ShortStorage_init(lua_State *L); extern void torch_IntStorage_init(lua_State *L); extern void torch_LongStorage_init(lua_State *L); extern void torch_FloatStorage_init(lua_State *L); extern void torch_DoubleStorage_init(lua_State *L); extern void torch_HalfStorage_init(lua_State *L); extern void torch_ByteTensor_init(lua_State *L); extern void torch_CharTensor_init(lua_State *L); extern void torch_ShortTensor_init(lua_State *L); extern void torch_IntTensor_init(lua_State *L); extern void torch_LongTensor_init(lua_State *L); extern void torch_FloatTensor_init(lua_State *L); extern void torch_DoubleTensor_init(lua_State *L); extern void torch_HalfTensor_init(lua_State *L); extern void torch_ByteTensorOperator_init(lua_State *L); extern void torch_CharTensorOperator_init(lua_State *L); extern void torch_ShortTensorOperator_init(lua_State *L); extern void torch_IntTensorOperator_init(lua_State *L); extern void torch_LongTensorOperator_init(lua_State *L); extern void torch_FloatTensorOperator_init(lua_State *L); extern void torch_DoubleTensorOperator_init(lua_State *L); extern void torch_TensorMath_init(lua_State *L); LUA_EXTERNC DLL_EXPORT int luaopen_libtorch(lua_State *L); int luaopen_libtorch(lua_State *L) { lua_newtable(L); lua_pushvalue(L, -1); lua_setglobal(L, "torch"); torch_utils_init(L); torch_File_init(L); torch_ByteStorage_init(L); torch_CharStorage_init(L); torch_ShortStorage_init(L); torch_IntStorage_init(L); torch_LongStorage_init(L); torch_FloatStorage_init(L); torch_DoubleStorage_init(L); torch_HalfStorage_init(L); torch_ByteTensor_init(L); torch_CharTensor_init(L); torch_ShortTensor_init(L); torch_IntTensor_init(L); torch_LongTensor_init(L); torch_FloatTensor_init(L); torch_DoubleTensor_init(L); torch_HalfTensor_init(L); torch_ByteTensorOperator_init(L); torch_CharTensorOperator_init(L); torch_ShortTensorOperator_init(L); torch_IntTensorOperator_init(L); torch_LongTensorOperator_init(L); torch_FloatTensorOperator_init(L); torch_DoubleTensorOperator_init(L); torch_Timer_init(L); torch_DiskFile_init(L); torch_PipeFile_init(L); torch_MemoryFile_init(L); torch_TensorMath_init(L); torch_random_init(L); // Create 'torch.Allocator' type. luaT_newmetatable(L, "torch.Allocator", NULL, NULL, NULL, NULL); return 1; } init.lua000066400000000000000000000130701316246254300125330ustar00rootroot00000000000000-- We are using paths.require to appease mkl -- Make this work with LuaJIT in Lua 5.2 compatibility mode, which -- renames string.gfind (already deprecated in 5.1) if not string.gfind then string.gfind = string.gmatch end if not table.unpack then table.unpack = unpack end require "paths" paths.require "libtorch" -- Keep track of all thread local variables torch. -- if a Lua VM is passed to another thread thread local -- variables need to be updated. function torch.updatethreadlocals() torch.updateerrorhandlers() local tracking = torch._heaptracking if tracking == nil then tracking = false end torch.setheaptracking(tracking) end --- package stuff function torch.packageLuaPath(name) if not name then local ret = string.match(torch.packageLuaPath('torch'), '(.*)/') if not ret then --windows? ret = string.match(torch.packageLuaPath('torch'), '(.*)\\') end return ret end for path in string.gmatch(package.path, "[^;]+") do path = string.gsub(path, "%?", name) local f = io.open(path) if f then f:close() local ret = string.match(path, "(.*)/") if not ret then --windows? ret = string.match(path, "(.*)\\") end return ret end end end local function include(file, depth) paths.dofile(file, 3 + (depth or 0)) end rawset(_G, 'include', include) function torch.include(package, file) dofile(torch.packageLuaPath(package) .. '/' .. file) end function torch.class(...) local tname, parenttname, module if select('#', ...) == 3 and type(select(1, ...)) == 'string' and type(select(2, ...)) == 'string' and type(select(3, ...)) == 'table' then tname = select(1, ...) parenttname = select(2, ...) module = select(3, ...) elseif select('#', ...) == 2 and type(select(1, ...)) == 'string' and type(select(2, ...)) == 'string' then tname = select(1, ...) parenttname = select(2, ...) elseif select('#', ...) == 2 and type(select(1, ...)) == 'string' and type(select(2, ...)) == 'table' then tname = select(1, ...) module = select(2, ...) elseif select('#', ...) == 1 and type(select(1, ...)) == 'string' then tname = select(1, ...) else error(' [] [] expected') end local function constructor(...) local self = {} torch.setmetatable(self, tname) if self.__init then self:__init(...) end return self end local function factory() local self = {} torch.setmetatable(self, tname) return self end local mt = torch.newmetatable(tname, parenttname, constructor, nil, factory, module) local mpt if parenttname then mpt = torch.getmetatable(parenttname) end return mt, mpt end function torch.setdefaulttensortype(typename) assert(type(typename) == 'string', 'string expected') if torch.getconstructortable(typename) then torch.Tensor = torch.getconstructortable(typename) torch.Storage = torch.getconstructortable(torch.typename(torch.Tensor(1):storage())) else error(string.format("<%s> is not a string describing a torch object", typename)) end end function torch.type(obj) local class = torch.typename(obj) if not class then class = type(obj) end return class end --[[ See if a given object is an instance of the provided torch class. ]] function torch.isTypeOf(obj, typeSpec) -- typeSpec can be provided as either a string, pattern, or the constructor. -- If the constructor is used, we look in the __typename field of the -- metatable to find a string to compare to. if type(typeSpec) ~= 'string' then typeSpec = getmetatable(typeSpec).__typename assert(type(typeSpec) == 'string', "type must be provided as [regexp] string, or factory") end local mt = getmetatable(obj) while mt do if type(mt) == 'table' and mt.__typename then local match = mt.__typename:match(typeSpec) -- Require full match for non-pattern specs if match and (match ~= typeSpec or match == mt.__typename) then return true end end mt = getmetatable(mt) end return false end torch.setdefaulttensortype('torch.DoubleTensor') require('torch.Tensor') require('torch.File') require('torch.CmdLine') require('torch.FFInterface') require('torch.Tester') require('torch.TestSuite') require('torch.test') function torch.totable(obj) if torch.isTensor(obj) or torch.isStorage(obj) then return obj:totable() else error("obj must be a Storage or a Tensor") end end function torch.isTensor(obj) local typename = torch.typename(obj) if typename and typename:find('torch.*Tensor') then return true end return false end function torch.isStorage(obj) local typename = torch.typename(obj) if typename and typename:find('torch.*Storage') then return true end return false end -- alias for convenience torch.Tensor.isTensor = torch.isTensor -- remove this line to disable automatic heap-tracking for garbage collection torch.setheaptracking(true) function torch.multinomialAliasSetup(probs, state) if torch.type(state) == 'table' then state[1], state[2] = torch.multinomialAliasSetup_(probs, state[1], state[2]) else state = {} state[1], state[2] = torch.multinomialAliasSetup_(probs) end return state end function torch.multinomialAlias(output, state) torch.DoubleTensor.multinomialAlias_(output, state[1], state[2]) return output end return torch lib/000077500000000000000000000000001316246254300116325ustar00rootroot00000000000000lib/CMakeLists.txt000066400000000000000000000004351316246254300143740ustar00rootroot00000000000000SET(TH_INSTALL_BIN_SUBDIR "${Torch_INSTALL_BIN_SUBDIR}") SET(TH_INSTALL_LIB_SUBDIR "${Torch_INSTALL_LIB_SUBDIR}") SET(TH_INSTALL_INCLUDE_SUBDIR "${Torch_INSTALL_INCLUDE_SUBDIR}") SET(TH_INSTALL_CMAKE_SUBDIR "${Torch_INSTALL_CMAKE_SUBDIR}") ADD_SUBDIRECTORY(TH) ADD_SUBDIRECTORY(luaT) lib/TH/000077500000000000000000000000001316246254300121455ustar00rootroot00000000000000lib/TH/CMakeLists.txt000066400000000000000000000364101316246254300147110ustar00rootroot00000000000000cmake_minimum_required(VERSION 2.6) # avoid some cmake warnings IF(POLICY CMP0026) CMAKE_POLICY(SET CMP0026 OLD) ENDIF() SET(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) SET(CMAKE_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/ ${CMAKE_LIBRARY_PATH}) # Can be compiled standalone IF(NOT TH_INSTALL_BIN_SUBDIR OR NOT TH_INSTALL_LIB_SUBDIR OR NOT TH_INSTALL_INCLUDE_SUBDIR OR NOT TH_INSTALL_CMAKE_SUBDIR) SET(TH_INSTALL_BIN_SUBDIR "bin" CACHE PATH "TH install binary subdirectory") SET(TH_INSTALL_LIB_SUBDIR "lib" CACHE PATH "TH install library subdirectory") SET(TH_INSTALL_INCLUDE_SUBDIR "include" CACHE PATH "TH install include subdirectory") SET(TH_INSTALL_CMAKE_SUBDIR "share/cmake/TH" CACHE PATH "TH install cmake subdirectory") ENDIF() ###################################################################### ###### macros section ##################################################################### IF(NOT ADD_TORCH_LIBRARY) MACRO(ADD_TORCH_LIBRARY package type src) IF ("${type}" STREQUAL "STATIC") if ("${src}" MATCHES "cu$" OR "${src}" MATCHES "cu;") CUDA_ADD_LIBRARY(${package} STATIC ${src}) else() ADD_LIBRARY(${package} STATIC ${src}) endif() ELSE() if ("${src}" MATCHES "cu$" OR "${src}" MATCHES "cu;") CUDA_ADD_LIBRARY(${package} ${type} ${src}) else() ADD_LIBRARY(${package} ${type} ${src}) endif() ENDIF() ENDMACRO() ENDIF() ####################################################################### ##### flags section ###################################################################### IF(MSVC) # MSVC now supports C99 since VS2013/VS2015, however the standard version switch is not provided yet # SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /std:c99") ELSE(MSVC) # enable gnu99 and not c99 because we use # gnu extensions like posix_memalign SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99") ENDIF(MSVC) IF(MSVC) ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) # respect the standard ENDIF(MSVC) IF(UNIX) # prevent Unknown CMake command "check_function_exists". INCLUDE(CheckFunctionExists) ENDIF(UNIX) # OpenMP support? SET(WITH_OPENMP ON CACHE BOOL "OpenMP support if available?") IF (APPLE AND CMAKE_COMPILER_IS_GNUCC) EXEC_PROGRAM (uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION) STRING (REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION}) MESSAGE (STATUS "MAC OS Darwin Version: ${DARWIN_VERSION}") IF (DARWIN_VERSION GREATER 9) SET(APPLE_OPENMP_SUCKS 1) ENDIF (DARWIN_VERSION GREATER 9) EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2) MESSAGE(STATUS "Warning: Disabling OpenMP (unstable with this version of GCC)") MESSAGE(STATUS " Install GCC >= 4.6.2 or change your OS to enable OpenMP") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unknown-pragmas") SET(WITH_OPENMP OFF CACHE BOOL "OpenMP support if available?" FORCE) ENDIF () ENDIF () IF (WITH_OPENMP) FIND_PACKAGE(OpenMP) IF(OPENMP_FOUND) MESSAGE(STATUS "Compiling with OpenMP support") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") ENDIF(OPENMP_FOUND) ENDIF (WITH_OPENMP) # ARM specific flags FIND_PACKAGE(ARM) IF (ASIMD_FOUND) MESSAGE(STATUS "asimd/Neon found with compiler flag : -D__NEON__") SET(CMAKE_C_FLAGS "-D__NEON__ ${CMAKE_C_FLAGS}") ELSEIF (NEON_FOUND) MESSAGE(STATUS "Neon found with compiler flag : -mfpu=neon -D__NEON__") SET(CMAKE_C_FLAGS "-mfpu=neon -D__NEON__ ${CMAKE_C_FLAGS}") ENDIF (ASIMD_FOUND) IF (CORTEXA8_FOUND) MESSAGE(STATUS "Cortex-A8 Found with compiler flag : -mcpu=cortex-a8") SET(CMAKE_C_FLAGS "-mcpu=cortex-a8 -fprefetch-loop-arrays ${CMAKE_C_FLAGS}") ENDIF (CORTEXA8_FOUND) IF (CORTEXA9_FOUND) MESSAGE(STATUS "Cortex-A9 Found with compiler flag : -mcpu=cortex-a9") SET(CMAKE_C_FLAGS "-mcpu=cortex-a9 ${CMAKE_C_FLAGS}") ENDIF (CORTEXA9_FOUND) INCLUDE (CheckIncludeFile) INCLUDE (CheckCSourceCompiles) CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H) # Check for a cpuid intrinsic IF(HAVE_CPUID_H) CHECK_C_SOURCE_COMPILES("#include int main() { unsigned int eax, ebx, ecx, edx; return __get_cpuid(0, &eax, &ebx, &ecx, &edx); }" HAVE_GCC_GET_CPUID) ENDIF() IF(HAVE_GCC_GET_CPUID) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DHAVE_GCC_GET_CPUID") ENDIF(HAVE_GCC_GET_CPUID) CHECK_C_SOURCE_COMPILES("#include static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { uint32_t a = *eax, b, c = *ecx, d; asm volatile ( \"cpuid\" : \"+a\"(a), \"=b\"(b), \"+c\"(c), \"=d\"(d) ); *eax = a; *ebx = b; *ecx = c; *edx = d; } int main() { uint32_t a,b,c,d; cpuid(&a, &b, &c, &d); return 0; }" NO_GCC_EBX_FPIC_BUG) IF(NOT NO_GCC_EBX_FPIC_BUG) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_GCC_GET_CPUID") ENDIF(NOT NO_GCC_EBX_FPIC_BUG) FIND_PACKAGE(SSE) # checks SSE, AVX and AVX2 IF(C_SSE2_FOUND) MESSAGE(STATUS "SSE2 Found") SET(CMAKE_C_FLAGS "${C_SSE2_FLAGS} -DUSE_SSE2 ${CMAKE_C_FLAGS}") ENDIF(C_SSE2_FOUND) IF(C_SSE3_FOUND) MESSAGE(STATUS "SSE3 Found") SET(CMAKE_C_FLAGS "${C_SSE3_FLAGS} -DUSE_SSE3 ${CMAKE_C_FLAGS}") ENDIF(C_SSE3_FOUND) # we don't set -mavx and -mavx2 flags globally, but only for specific files # however, we want to enable the AVX codepaths, so we still need to # add USE_AVX and USE_AVX2 macro defines IF(C_AVX_FOUND) MESSAGE(STATUS "AVX Found") SET(CMAKE_C_FLAGS "-DUSE_AVX ${CMAKE_C_FLAGS}") ENDIF(C_AVX_FOUND) IF(C_AVX2_FOUND) MESSAGE(STATUS "AVX2 Found") SET(CMAKE_C_FLAGS "-DUSE_AVX2 ${CMAKE_C_FLAGS}") ENDIF(C_AVX2_FOUND) CHECK_C_SOURCE_RUNS(" #include int main() { int a; int oa; atomic_store(&a, 1); atomic_fetch_add(&a, 1); oa = atomic_load(&a); if(!atomic_compare_exchange_strong(&a, &oa, 3)) return -1; return 0; } " HAS_C11_ATOMICS) IF(NOT HAS_C11_ATOMICS) CHECK_C_SOURCE_RUNS(" #include int main() { long a; _InterlockedExchange(&a, 1); _InterlockedExchangeAdd(&a, 1); if(_InterlockedCompareExchange(&a, 3, 2) != 2) return -1; return 0; } " HAS_MSC_ATOMICS) CHECK_C_SOURCE_RUNS(" int main() { int a; __sync_lock_test_and_set(&a, 1); __sync_fetch_and_add(&a, 1); if(!__sync_bool_compare_and_swap(&a, 2, 3)) return -1; return 0; } " HAS_GCC_ATOMICS) ENDIF() ####################################################################### ##### sources section ###################################################################### # IF ANY SIMD FOUND IF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND) SET(simd generic/simd/convolve.c) ENDIF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND) # IF SSE4 FOUND IF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND) SET(CMAKE_C_FLAGS "${C_SSE4_1_FLAGS} -DUSE_SSE4_1 ${C_SSE4_2_FLAGS} -DUSE_SSE4_2 ${CMAKE_C_FLAGS}") IF(MSVC) SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast") ELSE(MSVC) SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math") ENDIF(MSVC) SET(simd ${simd} generic/simd/convolve5x5_sse.c) ENDIF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND) # IF AVX FOUND IF(C_AVX_FOUND) IF(MSVC) SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast ${C_AVX_FLAGS}") SET_SOURCE_FILES_PROPERTIES(vector/AVX.c PROPERTIES COMPILE_FLAGS "/Ox /arch:AVX ${C_AVX_FLAGS}") ELSE(MSVC) SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math ${C_AVX_FLAGS}") SET_SOURCE_FILES_PROPERTIES(vector/AVX.c PROPERTIES COMPILE_FLAGS "-O3 ${C_AVX_FLAGS}") ENDIF(MSVC) SET(simd ${simd} vector/AVX.c generic/simd/convolve5x5_avx.c) ENDIF(C_AVX_FOUND) IF(C_AVX2_FOUND) IF(MSVC) SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS "/Ox /arch:AVX2 ${C_AVX2_FLAGS}") ELSE(MSVC) SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS "-O3 ${C_AVX2_FLAGS}") ENDIF(MSVC) SET(simd ${simd} vector/AVX2.c) ENDIF(C_AVX2_FOUND) SET(hdr THGeneral.h THHalf.h THAllocator.h THSize.h THStorage.h THTensor.h THTensorApply.h THBlas.h THMath.h THLapack.h THLogAdd.h THRandom.h THVector.h THAtomic.h ) SET(src THGeneral.c THHalf.c THAllocator.c THSize.c THStorage.c THTensor.c THBlas.c THLapack.c THLogAdd.c THRandom.c THFile.c THDiskFile.c THMemoryFile.c THAtomic.c THVector.c) SET(src ${src} ${hdr} ${simd}) ####################################################################### ##### build section ###################################################################### ADD_TORCH_LIBRARY(TH SHARED "${src}") IF (BUILD_STATIC OR "$ENV{STATIC_TH}" STREQUAL "YES") ADD_TORCH_LIBRARY(TH_static STATIC "${src}") SET_TARGET_PROPERTIES(TH_static PROPERTIES COMPILE_FLAGS "-fPIC") SET_TARGET_PROPERTIES(TH_static PROPERTIES PREFIX "lib" IMPORT_PREFIX "lib" OUTPUT_NAME "TH") ENDIF() IF(NOT TH_SO_VERSION) SET(TH_SO_VERSION 0) ENDIF(NOT TH_SO_VERSION) MESSAGE(STATUS "TH_SO_VERSION: ${TH_SO_VERSION}") SET_TARGET_PROPERTIES(TH PROPERTIES VERSION ${TH_SO_VERSION} SOVERSION ${TH_SO_VERSION}) IF(HAS_C11_ATOMICS) ADD_DEFINITIONS(-DUSE_C11_ATOMICS=1) MESSAGE(STATUS "Atomics: using C11 intrinsics") ELSEIF(HAS_MSC_ATOMICS) ADD_DEFINITIONS(-DUSE_MSC_ATOMICS=1) MESSAGE(STATUS "Atomics: using MSVC intrinsics") ELSEIF(HAS_GCC_ATOMICS) ADD_DEFINITIONS(-DUSE_GCC_ATOMICS=1) MESSAGE(STATUS "Atomics: using GCC intrinsics") ELSE() SET(CMAKE_THREAD_PREFER_PTHREAD TRUE) FIND_PACKAGE(Threads) IF(THREADS_FOUND) ADD_DEFINITIONS(-DUSE_PTHREAD_ATOMICS=1) TARGET_LINK_LIBRARIES(TH ${CMAKE_THREAD_LIBS_INIT}) MESSAGE(STATUS "Atomics: using pthread") ENDIF() ENDIF() FIND_PACKAGE(BLAS) IF(BLAS_FOUND) SET(USE_BLAS 1) IF ($ENV{TH_BINARY_BUILD}) MESSAGE(STATUS "TH_BINARY_BUILD detected. Enabling special linkage.") TARGET_LINK_LIBRARIES(TH "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") ELSE ($ENV{TH_BINARY_BUILD}) TARGET_LINK_LIBRARIES(TH ${BLAS_LIBRARIES}) ENDIF ($ENV{TH_BINARY_BUILD}) IF(BLAS_INFO STREQUAL "mkl") ADD_DEFINITIONS(-DTH_BLAS_MKL) ENDIF() ENDIF(BLAS_FOUND) FIND_PACKAGE(LAPACK) IF(LAPACK_FOUND) SET(USE_LAPACK 1) TARGET_LINK_LIBRARIES(TH ${LAPACK_LIBRARIES}) ENDIF(LAPACK_FOUND) IF (UNIX AND NOT APPLE) INCLUDE(CheckLibraryExists) # https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830 CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" NEED_LIBRT) IF(NEED_LIBRT) TARGET_LINK_LIBRARIES(TH rt) SET(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} rt) ENDIF(NEED_LIBRT) ENDIF(UNIX AND NOT APPLE) IF(UNIX) SET(CMAKE_EXTRA_INCLUDE_FILES "sys/mman.h") CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP) IF(HAVE_MMAP) ADD_DEFINITIONS(-DHAVE_MMAP=1) ENDIF(HAVE_MMAP) # done for lseek: https://www.gnu.org/software/libc/manual/html_node/File-Position-Primitive.html ADD_DEFINITIONS(-D_FILE_OFFSET_BITS=64) CHECK_FUNCTION_EXISTS(shm_open HAVE_SHM_OPEN) IF(HAVE_SHM_OPEN) ADD_DEFINITIONS(-DHAVE_SHM_OPEN=1) ENDIF(HAVE_SHM_OPEN) CHECK_FUNCTION_EXISTS(shm_unlink HAVE_SHM_UNLINK) IF(HAVE_SHM_UNLINK) ADD_DEFINITIONS(-DHAVE_SHM_UNLINK=1) ENDIF(HAVE_SHM_UNLINK) CHECK_FUNCTION_EXISTS(malloc_usable_size HAVE_MALLOC_USABLE_SIZE) IF(HAVE_MALLOC_USABLE_SIZE) ADD_DEFINITIONS(-DHAVE_MALLOC_USABLE_SIZE=1) ENDIF(HAVE_MALLOC_USABLE_SIZE) ENDIF(UNIX) IF(NOT MSVC) TARGET_LINK_LIBRARIES(TH m) ENDIF(NOT MSVC) # Is __thread supported? IF(NOT MSVC) CHECK_C_SOURCE_COMPILES("static __thread int x = 1; int main() { return x; }" C_HAS_THREAD) ELSE(NOT MSVC) CHECK_C_SOURCE_COMPILES("static __declspec( thread ) int x = 1; int main() { return x; }" C_HAS_THREAD) ENDIF(NOT MSVC) IF(NOT C_HAS_THREAD) MESSAGE(STATUS "Warning: __thread is not supported, generating thread-unsafe code") ELSE(NOT C_HAS_THREAD) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DTH_HAVE_THREAD") ENDIF(NOT C_HAS_THREAD) INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}") CONFIGURE_FILE(THGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h") ####################################################################### ##### install section ###################################################################### INSTALL(TARGETS TH EXPORT TH-exports RUNTIME DESTINATION "${TH_INSTALL_BIN_SUBDIR}" LIBRARY DESTINATION "${TH_INSTALL_LIB_SUBDIR}" ARCHIVE DESTINATION "${TH_INSTALL_LIB_SUBDIR}") INSTALL(FILES TH.h THAllocator.h THMath.h THBlas.h THDiskFile.h THFile.h THFilePrivate.h ${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h THGenerateAllTypes.h THGenerateDoubleType.h THGenerateFloatType.h THGenerateHalfType.h THGenerateLongType.h THGenerateIntType.h THGenerateShortType.h THGenerateCharType.h THGenerateByteType.h THGenerateFloatTypes.h THGenerateIntTypes.h THLapack.h THLogAdd.h THMemoryFile.h THRandom.h THSize.h THStorage.h THTensor.h THTensorApply.h THTensorDimApply.h THTensorMacros.h THVector.h THAtomic.h THHalf.h DESTINATION "${TH_INSTALL_INCLUDE_SUBDIR}/TH") INSTALL(FILES vector/AVX.h vector/AVX2.h DESTINATION "${TH_INSTALL_INCLUDE_SUBDIR}/TH/vector") INSTALL(FILES generic/THBlas.c generic/THBlas.h generic/THLapack.c generic/THLapack.h generic/THStorage.c generic/THStorage.h generic/THStorageCopy.c generic/THStorageCopy.h generic/THTensor.c generic/THTensor.h generic/THTensorConv.c generic/THTensorConv.h generic/THTensorCopy.c generic/THTensorCopy.h generic/THTensorLapack.c generic/THTensorLapack.h generic/THTensorMath.c generic/THTensorMath.h generic/THTensorRandom.c generic/THTensorRandom.h generic/THVectorDispatch.c generic/THVector.h DESTINATION "${TH_INSTALL_INCLUDE_SUBDIR}/TH/generic") IF (WIN32 AND NOT CYGWIN) SET(BLAS_INSTALL_LIBRARIES "OFF" CACHE BOOL "Copy the required BLAS DLLs into the TH install dirs") ENDIF (WIN32 AND NOT CYGWIN) MACRO(Install_Required_Library ln) get_filename_component(libpath ${ln} PATH) get_filename_component(libname ${ln} NAME_WE) file(GLOB libdlls "${libpath}/${libname}*.dll") install(PROGRAMS ${libdlls} DESTINATION "${TH_INSTALL_BIN_SUBDIR}") ENDMACRO(Install_Required_Library libname) IF (BLAS_FOUND AND BLAS_INSTALL_LIBRARIES) IF (BLAS_goto2_LIBRARY) Install_Required_Library(${BLAS_goto2_LIBRARY}) Install_Required_Library("${libpath}/libgfortran") Install_Required_Library("${libpath}/libquadmath") Install_Required_Library("${libpath}/libgcc") ENDIF() IF (BLAS_openblas_LIBRARY) Install_Required_Library(${BLAS_openblas_LIBRARY}) Install_Required_Library("${libpath}/libquadmath") Install_Required_Library("${libpath}/libgfortran") Install_Required_Library("${libpath}/libgcc") ENDIF() ENDIF() # Create THConfig.cmake GET_TARGET_PROPERTY(TH_OUTPUT_NAME TH LOCATION) GET_FILENAME_COMPONENT(TH_OUTPUT_NAME ${TH_OUTPUT_NAME} NAME) SET(TH_LIBRARIES "${CMAKE_INSTALL_PREFIX}/${TH_INSTALL_LIB_SUBDIR}/${TH_OUTPUT_NAME}") SET(TH_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${TH_INSTALL_INCLUDE_SUBDIR}/TH") CONFIGURE_FILE(THConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/THConfig.cmake") INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/THConfig.cmake" DESTINATION "${TH_INSTALL_CMAKE_SUBDIR}") lib/TH/README.md000066400000000000000000000004211316246254300134210ustar00rootroot00000000000000Environment variables control the disabling of certain explicit SIMD optimizations. ``` x64 options: TH_NO_AVX2=1 # disable AVX2 codepaths TH_NO_AVX=1 # disable AVX codepaths TH_NO_SSE=1 # disable SSE codepaths ppc64le options: TH_NO_VSX=1 # disable VSX codepaths ``` lib/TH/TH.h000066400000000000000000000006341316246254300126340ustar00rootroot00000000000000#ifndef TH_INC #define TH_INC #include "THGeneral.h" #include "THBlas.h" #ifdef USE_LAPACK #include "THLapack.h" #endif #include "THAtomic.h" #include "THVector.h" #include "THLogAdd.h" #include "THRandom.h" #include "THSize.h" #include "THStorage.h" #include "THTensor.h" #include "THTensorApply.h" #include "THTensorDimApply.h" #include "THFile.h" #include "THDiskFile.h" #include "THMemoryFile.h" #endif lib/TH/THAllocator.c000066400000000000000000000344171316246254300144760ustar00rootroot00000000000000#include "THAllocator.h" #include "THAtomic.h" /* stuff for mapped files */ #ifdef _WIN32 #include #endif #if HAVE_MMAP #include #include #include #include #include #endif /* end of stuff for mapped files */ static void *THDefaultAllocator_alloc(void* ctx, ptrdiff_t size) { return THAlloc(size); } static void *THDefaultAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) { return THRealloc(ptr, size); } static void THDefaultAllocator_free(void* ctx, void* ptr) { THFree(ptr); } THAllocator THDefaultAllocator = { &THDefaultAllocator_alloc, &THDefaultAllocator_realloc, &THDefaultAllocator_free }; #if defined(_WIN32) || defined(HAVE_MMAP) struct THMapAllocatorContext_ { char *filename; /* file name */ int flags; ptrdiff_t size; /* mapped size */ int fd; }; #define TH_ALLOC_ALIGNMENT 64 typedef struct { int refcount; } THMapInfo; char * unknown_filename = "filename not specified"; THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags) { THMapAllocatorContext *ctx = THAlloc(sizeof(THMapAllocatorContext)); if (!(flags & TH_ALLOCATOR_MAPPED_SHARED) && !(flags & TH_ALLOCATOR_MAPPED_SHAREDMEM)) flags &= ~TH_ALLOCATOR_MAPPED_NOCREATE; if ((flags ^ TH_ALLOCATOR_MAPPED_EXCLUSIVE) == 0) THError("TH_ALLOCATOR_MAPPED_EXCLUSIVE flag requires opening the file " "in shared mode"); if (filename) { ctx->filename = THAlloc(strlen(filename)+1); strcpy(ctx->filename, filename); } else { ctx->filename = unknown_filename; } ctx->flags = flags; ctx->size = 0; ctx->fd = -1; return ctx; } THMapAllocatorContext *THMapAllocatorContext_newWithFd(const char *filename, int fd, int flags) { THMapAllocatorContext *ctx = THMapAllocatorContext_new(filename, flags); ctx->fd = fd; return ctx; } char * THMapAllocatorContext_filename(THMapAllocatorContext *ctx) { return ctx->filename; } int THMapAllocatorContext_fd(THMapAllocatorContext *ctx) { return ctx->fd; } ptrdiff_t THMapAllocatorContext_size(THMapAllocatorContext *ctx) { return ctx->size; } void THMapAllocatorContext_free(THMapAllocatorContext *ctx) { if (ctx->filename != unknown_filename) THFree(ctx->filename); THFree(ctx); } static void *_map_alloc(void* ctx_, ptrdiff_t size) { THMapAllocatorContext *ctx = ctx_; void *data = NULL; #ifdef _WIN32 { HANDLE hfile; HANDLE hmfile; LARGE_INTEGER hfilesz; if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE) THError("exclusive file mapping is not supported on Windows"); if (ctx->flags & TH_ALLOCATOR_MAPPED_NOCREATE) THError("file mapping without creation is not supported on Windows"); if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) THError("TH_ALLOCATOR_MAPPED_KEEPFD not supported on Windows"); if (ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD) THError("TH_ALLOCATOR_MAPPED_FROMFD not supported on Windows"); /* open file */ /* FILE_FLAG_RANDOM_ACCESS ? */ if(ctx->flags) { hfile = CreateFileA(ctx->filename, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_WRITE|FILE_SHARE_READ, 0, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0); if (hfile == INVALID_HANDLE_VALUE) THError("could not open file <%s> in read-write mode; error code: <%d>", ctx->filename, GetLastError()); } else { hfile = CreateFileA(ctx->filename, GENERIC_READ, FILE_SHARE_WRITE|FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); if (hfile == INVALID_HANDLE_VALUE) THError("could not open file <%s> in read-only mode; error code: <%d>", ctx->filename, GetLastError()); } if (GetFileSizeEx(hfile, &hfilesz) == 0) { THError("could not get file size: <%s>; error code: <%d>", ctx->filename, GetLastError()); } if(size > 0) { if(size > hfilesz.QuadPart) { if(ctx->flags) { hfilesz.QuadPart = size; if(SetFilePointerEx(hfile, hfilesz, NULL, FILE_BEGIN) == 0) { CloseHandle(hfile); THError("unable to stretch file <%s> to the right size; error code: <%d>", ctx->filename, GetLastError()); } if(SetEndOfFile(hfile) == 0) { CloseHandle(hfile); THError("unable to write to file <%s>; error code: <%d>", ctx->filename, GetLastError()); } } else { CloseHandle(hfile); THError("file <%s> size is smaller than the required mapping size <%ld>; error code: <%d>", ctx->filename, size, GetLastError()); } } } else size = hfilesz.QuadPart; ctx->size = size; /* if we are here, it must be the right size */ hfilesz.QuadPart = ctx->size; /* get map handle */ if(ctx->flags) { if( (hmfile = CreateFileMapping(hfile, NULL, PAGE_READWRITE, hfilesz.HighPart, hfilesz.LowPart, NULL)) == NULL ) THError("could not create a map on file <%s>; error code: <%d>", ctx->filename, GetLastError()); } else { if( (hmfile = CreateFileMapping(hfile, NULL, PAGE_WRITECOPY, hfilesz.HighPart, hfilesz.LowPart, NULL)) == NULL ) THError("could not create a map on file <%s>; error code: <%d>", ctx->filename, GetLastError()); } /* map the stuff */ if(ctx->flags) data = MapViewOfFile(hmfile, FILE_MAP_ALL_ACCESS, 0, 0, 0); else data = MapViewOfFile(hmfile, FILE_MAP_COPY, 0, 0, 0); CloseHandle(hfile); CloseHandle(hmfile); } #else /* _WIN32 */ { /* open file */ int fd; int flags; struct stat file_stat; if (ctx->flags & (TH_ALLOCATOR_MAPPED_SHARED | TH_ALLOCATOR_MAPPED_SHAREDMEM)) flags = O_RDWR | O_CREAT; else flags = O_RDONLY; if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE) flags |= O_EXCL; if (ctx->flags & TH_ALLOCATOR_MAPPED_NOCREATE) flags &= ~O_CREAT; if (!(ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD)) { if(ctx->flags & TH_ALLOCATOR_MAPPED_SHARED) { if((fd = open(ctx->filename, flags, (mode_t)0600)) == -1) THError("unable to open file <%s> in read-write mode", ctx->filename); } else if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM) { #ifdef HAVE_SHM_OPEN if((fd = shm_open(ctx->filename, flags, (mode_t)0600)) == -1) THError("unable to open shared memory object <%s> in read-write mode", ctx->filename); #else THError("unable to open file <%s> in sharedmem mode, shm_open unavailable on this platform", ctx->filename); #endif } else { if((fd = open(ctx->filename, O_RDONLY)) == -1) THError("unable to open file <%s> in read-only mode", ctx->filename); } } else { fd = ctx->fd; } if(fstat(fd, &file_stat) == -1) { if (!(ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD)) close(fd); THError("unable to stat the file <%s>", ctx->filename); } if(size > 0) { if(size > file_stat.st_size) { if(ctx->flags) { if(ftruncate(fd, size) == -1) THError("unable to resize file <%s> to the right size", ctx->filename); if(fstat(fd, &file_stat) == -1 || file_stat.st_size < size) { close(fd); THError("unable to stretch file <%s> to the right size", ctx->filename); } /* on OS X write returns with errno 45 (Opperation not supported) when used * with a file descriptor obtained via shm_open */ #ifndef __APPLE__ if((write(fd, "", 1)) != 1) /* note that the string "" contains the '\0' byte ... */ { close(fd); THError("unable to write to file <%s>", ctx->filename); } #endif } else { close(fd); THError("file <%s> size is smaller than the required mapping size <%ld>", ctx->filename, size); } } } else size = file_stat.st_size; ctx->size = size; /* if we are here, it must be the right size */ /* map it */ if (ctx->flags & (TH_ALLOCATOR_MAPPED_SHARED | TH_ALLOCATOR_MAPPED_SHAREDMEM)) data = mmap(NULL, ctx->size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); else data = mmap(NULL, ctx->size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) { ctx->fd = fd; } else { if(close(fd) == -1) THError("Error closing file <%s>", ctx->filename); ctx->fd = -1; } if (ctx->flags & TH_ALLOCATOR_MAPPED_UNLINK) { if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM) { #ifdef HAVE_SHM_UNLINK if (shm_unlink(ctx->filename) == -1) THError("could not unlink the shared memory file %s", ctx->filename); #else THError("could not unlink the shared memory file %s, shm_unlink not available on platform", ctx->filename); #endif } else { if (unlink(ctx->filename) == -1) THError("could not unlink file %s", ctx->filename); } } if(data == MAP_FAILED) { data = NULL; /* let's be sure it is NULL */ THError("$ Torch: unable to mmap memory: you tried to mmap %dGB.", ctx->size/1073741824); } } #endif return data; } static void * THMapAllocator_alloc(void *ctx, ptrdiff_t size) { return _map_alloc(ctx, size); } static void *THMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) { THError("cannot realloc mapped data"); return NULL; } static void THMapAllocator_free(void* ctx_, void* data) { THMapAllocatorContext *ctx = ctx_; #ifdef _WIN32 if(UnmapViewOfFile(data) == 0) THError("could not unmap the shared memory file"); #else /* _WIN32 */ if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) { if (close(ctx->fd) == -1) THError("could not close file descriptor %d", ctx->fd); } if (munmap(data, ctx->size)) THError("could not unmap the shared memory file"); if (!(ctx->flags & (TH_ALLOCATOR_MAPPED_FROMFD | TH_ALLOCATOR_MAPPED_UNLINK))) { if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM) { #ifdef HAVE_SHM_UNLINK if (shm_unlink(ctx->filename) == -1) THError("could not unlink the shared memory file %s", ctx->filename); #else THError("could not unlink the shared memory file %s, shm_unlink not available on platform", ctx->filename); #endif } } #endif /* _WIN32 */ THMapAllocatorContext_free(ctx); } #else THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags) { THError("file mapping not supported on your system"); return NULL; } void THMapAllocatorContext_free(THMapAllocatorContext *ctx) { THError("file mapping not supported on your system"); } static void *THMapAllocator_alloc(void* ctx_, ptrdiff_t size) { THError("file mapping not supported on your system"); return NULL; } static void *THMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) { THError("file mapping not supported on your system"); return NULL; } static void THMapAllocator_free(void* ctx, void* data) { THError("file mapping not supported on your system"); } #endif #if (defined(_WIN32) || defined(HAVE_MMAP)) && defined(TH_ATOMIC_IPC_REFCOUNT) static void * THRefcountedMapAllocator_alloc(void *_ctx, ptrdiff_t size) { THMapAllocatorContext *ctx = _ctx; if (ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD) THError("THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_FROMFD flag"); if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) THError("THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_KEEPFD flag"); if (ctx->flags & TH_ALLOCATOR_MAPPED_UNLINK) THError("THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_UNLINK flag"); if (!(ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM)) THError("THRefcountedMapAllocator requires TH_ALLOCATOR_MAPPED_SHAREDMEM flag"); size = size + TH_ALLOC_ALIGNMENT; void *ptr = _map_alloc(ctx, size); char *data = ((char*)ptr) + TH_ALLOC_ALIGNMENT; THMapInfo *map_info = (THMapInfo*)ptr; if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE) map_info->refcount = 1; else THAtomicIncrementRef(&map_info->refcount); return (void*)data; } static void *THRefcountedMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) { THError("cannot realloc mapped data"); return NULL; } static void THRefcountedMapAllocator_free(void* ctx_, void* data) { THMapAllocatorContext *ctx = ctx_; #ifdef _WIN32 if(UnmapViewOfFile(data) == 0) THError("could not unmap the shared memory file"); #else /* _WIN32 */ THMapInfo *info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT); if (THAtomicDecrementRef(&info->refcount)) { #ifdef HAVE_SHM_UNLINK if (shm_unlink(ctx->filename) == -1) THError("could not unlink the shared memory file %s", ctx->filename); #else THError("could not unlink the shared memory file %s, shm_unlink not available on platform", ctx->filename); #endif /* HAVE_SHM_UNLINK */ } if (munmap(info, ctx->size)) THError("could not unmap the shared memory file %s", ctx->filename); #endif /* _WIN32 */ THMapAllocatorContext_free(ctx); } void THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data) { THMapInfo *map_info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT); THAtomicIncrementRef(&map_info->refcount); } int THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data) { THMapInfo *map_info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT); return THAtomicDecrementRef(&map_info->refcount); } #else static void * THRefcountedMapAllocator_alloc(void *ctx, ptrdiff_t size) { THError("refcounted file mapping not supported on your system"); return NULL; } static void *THRefcountedMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) { THError("refcounted file mapping not supported on your system"); return NULL; } static void THRefcountedMapAllocator_free(void* ctx_, void* data) { THError("refcounted file mapping not supported on your system"); } void THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data) { THError("refcounted file mapping not supported on your system"); } int THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data) { THError("refcounted file mapping not supported on your system"); return 0; } #endif THAllocator THMapAllocator = { &THMapAllocator_alloc, &THMapAllocator_realloc, &THMapAllocator_free }; THAllocator THRefcountedMapAllocator = { &THRefcountedMapAllocator_alloc, &THRefcountedMapAllocator_realloc, &THRefcountedMapAllocator_free }; lib/TH/THAllocator.h000066400000000000000000000027411316246254300144760ustar00rootroot00000000000000#ifndef TH_ALLOCATOR_INC #define TH_ALLOCATOR_INC #include "THGeneral.h" #define TH_ALLOCATOR_MAPPED_SHARED 1 #define TH_ALLOCATOR_MAPPED_SHAREDMEM 2 #define TH_ALLOCATOR_MAPPED_EXCLUSIVE 4 #define TH_ALLOCATOR_MAPPED_NOCREATE 8 #define TH_ALLOCATOR_MAPPED_KEEPFD 16 #define TH_ALLOCATOR_MAPPED_FROMFD 32 #define TH_ALLOCATOR_MAPPED_UNLINK 64 /* Custom allocator */ typedef struct THAllocator { void* (*malloc)(void*, ptrdiff_t); void* (*realloc)(void*, void*, ptrdiff_t); void (*free)(void*, void*); } THAllocator; /* default malloc/free allocator. malloc and realloc raise an error (using * THError) on allocation failure. */ extern THAllocator THDefaultAllocator; /* file map allocator */ typedef struct THMapAllocatorContext_ THMapAllocatorContext; TH_API THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags); TH_API THMapAllocatorContext *THMapAllocatorContext_newWithFd(const char *filename, int fd, int flags); TH_API char * THMapAllocatorContext_filename(THMapAllocatorContext *ctx); TH_API int THMapAllocatorContext_fd(THMapAllocatorContext *ctx); TH_API ptrdiff_t THMapAllocatorContext_size(THMapAllocatorContext *ctx); TH_API void THMapAllocatorContext_free(THMapAllocatorContext *ctx); TH_API void THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data); TH_API int THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data); extern THAllocator THMapAllocator; extern THAllocator THRefcountedMapAllocator; #endif lib/TH/THAtomic.c000066400000000000000000000137641316246254300137740ustar00rootroot00000000000000#include "THAtomic.h" /* Note: I thank Leon Bottou for his useful comments. Ronan. */ #if defined(USE_C11_ATOMICS) #include #endif #if defined(USE_MSC_ATOMICS) #include #include #endif #if !defined(USE_MSC_ATOMICS) && !defined(USE_GCC_ATOMICS) && defined(USE_PTHREAD_ATOMICS) #include static pthread_mutex_t ptm = PTHREAD_MUTEX_INITIALIZER; #endif void THAtomicSet(int volatile *a, int newvalue) { #if defined(USE_C11_ATOMICS) atomic_store(a, newvalue); #elif defined(USE_MSC_ATOMICS) assert(sizeof(int) == sizeof(long)); _InterlockedExchange((long*)a, newvalue); #elif defined(USE_GCC_ATOMICS) __sync_lock_test_and_set(a, newvalue); #else int oldvalue; do { oldvalue = *a; } while (!THAtomicCompareAndSwap(a, oldvalue, newvalue)); #endif } int THAtomicGet(int volatile *a) { #if defined(USE_C11_ATOMICS) return atomic_load(a); #else int value; do { value = *a; } while (!THAtomicCompareAndSwap(a, value, value)); return value; #endif } int THAtomicAdd(int volatile *a, int value) { #if defined(USE_C11_ATOMICS) return atomic_fetch_add(a, value); #elif defined(USE_MSC_ATOMICS) assert(sizeof(int) == sizeof(long)); return _InterlockedExchangeAdd((long*)a, value); #elif defined(USE_GCC_ATOMICS) return __sync_fetch_and_add(a, value); #else int oldvalue; do { oldvalue = *a; } while (!THAtomicCompareAndSwap(a, oldvalue, (oldvalue + value))); return oldvalue; #endif } void THAtomicIncrementRef(int volatile *a) { THAtomicAdd(a, 1); } int THAtomicDecrementRef(int volatile *a) { return (THAtomicAdd(a, -1) == 1); } int THAtomicCompareAndSwap(int volatile *a, int oldvalue, int newvalue) { #if defined(USE_C11_ATOMICS) return atomic_compare_exchange_strong(a, &oldvalue, newvalue); #elif defined(USE_MSC_ATOMICS) assert(sizeof(int) == sizeof(long)); return (_InterlockedCompareExchange((long*)a, (long)newvalue, (long)oldvalue) == (long)oldvalue); #elif defined(USE_GCC_ATOMICS) return __sync_bool_compare_and_swap(a, oldvalue, newvalue); #elif defined(USE_PTHREAD_ATOMICS) int ret = 0; pthread_mutex_lock(&ptm); if(*a == oldvalue) { *a = newvalue; ret = 1; } pthread_mutex_unlock(&ptm); return ret; #else #warning THAtomic is not thread safe if(*a == oldvalue) { *a = newvalue; return 1; } else return 0; #endif } void THAtomicSetLong(long volatile *a, long newvalue) { #if defined(USE_C11_ATOMICS) atomic_store(a, newvalue); #elif defined(USE_MSC_ATOMICS) _InterlockedExchange(a, newvalue); #elif defined(USE_GCC_ATOMICS) __sync_lock_test_and_set(a, newvalue); #else long oldvalue; do { oldvalue = *a; } while (!THAtomicCompareAndSwapLong(a, oldvalue, newvalue)); #endif } long THAtomicGetLong(long volatile *a) { #if defined(USE_C11_ATOMICS) return atomic_load(a); #else long value; do { value = *a; } while (!THAtomicCompareAndSwapLong(a, value, value)); return value; #endif } long THAtomicAddLong(long volatile *a, long value) { #if defined(USE_C11_ATOMICS) return atomic_fetch_add(a, value); #elif defined(USE_MSC_ATOMICS) return _InterlockedExchangeAdd(a, value); #elif defined(USE_GCC_ATOMICS) return __sync_fetch_and_add(a, value); #else long oldvalue; do { oldvalue = *a; } while (!THAtomicCompareAndSwapLong(a, oldvalue, (oldvalue + value))); return oldvalue; #endif } long THAtomicCompareAndSwapLong(long volatile *a, long oldvalue, long newvalue) { #if defined(USE_C11_ATOMICS) return atomic_compare_exchange_strong(a, &oldvalue, newvalue); #elif defined(USE_MSC_ATOMICS) return (_InterlockedCompareExchange(a, newvalue, oldvalue) == oldvalue); #elif defined(USE_GCC_ATOMICS) return __sync_bool_compare_and_swap(a, oldvalue, newvalue); #elif defined(USE_PTHREAD_ATOMICS) long ret = 0; pthread_mutex_lock(&ptm); if(*a == oldvalue) { *a = newvalue; ret = 1; } pthread_mutex_unlock(&ptm); return ret; #else #warning THAtomic is not thread safe if(*a == oldvalue) { *a = newvalue; return 1; } else return 0; #endif } void THAtomicSetPtrdiff(ptrdiff_t volatile *a, ptrdiff_t newvalue) { #if defined(USE_C11_ATOMICS) atomic_store(a, newvalue); #elif defined(USE_MSC_ATOMICS) #ifdef _WIN64 _InterlockedExchange64(a, newvalue); #else _InterlockedExchange(a, newvalue); #endif #elif defined(USE_GCC_ATOMICS) __sync_lock_test_and_set(a, newvalue); #else ptrdiff_t oldvalue; do { oldvalue = *a; } while (!THAtomicCompareAndSwapPtrdiff(a, oldvalue, newvalue)); #endif } ptrdiff_t THAtomicGetPtrdiff(ptrdiff_t volatile *a) { #if defined(USE_C11_ATOMICS) return atomic_load(a); #else ptrdiff_t value; do { value = *a; } while (!THAtomicCompareAndSwapPtrdiff(a, value, value)); return value; #endif } ptrdiff_t THAtomicAddPtrdiff(ptrdiff_t volatile *a, ptrdiff_t value) { #if defined(USE_C11_ATOMICS) return atomic_fetch_add(a, value); #elif defined(USE_MSC_ATOMICS) #ifdef _WIN64 return _InterlockedExchangeAdd64(a, value); #else return _InterlockedExchangeAdd(a, value); #endif #elif defined(USE_GCC_ATOMICS) return __sync_fetch_and_add(a, value); #else ptrdiff_t oldvalue; do { oldvalue = *a; } while (!THAtomicCompareAndSwapPtrdiff(a, oldvalue, (oldvalue + value))); return oldvalue; #endif } ptrdiff_t THAtomicCompareAndSwapPtrdiff(ptrdiff_t volatile *a, ptrdiff_t oldvalue, ptrdiff_t newvalue) { #if defined(USE_C11_ATOMICS) return atomic_compare_exchange_strong(a, &oldvalue, newvalue); #elif defined(USE_MSC_ATOMICS) #ifdef _WIN64 return (_InterlockedCompareExchange64(a, newvalue, oldvalue) == oldvalue); #else return (_InterlockedCompareExchange(a, newvalue, oldvalue) == oldvalue); #endif #elif defined(USE_GCC_ATOMICS) return __sync_bool_compare_and_swap(a, oldvalue, newvalue); #elif defined(USE_PTHREAD_ATOMICS) ptrdiff_t ret = 0; pthread_mutex_lock(&ptm); if(*a == oldvalue) { *a = newvalue; ret = 1; } pthread_mutex_unlock(&ptm); return ret; #else #warning THAtomic is not thread safe if(*a == oldvalue) { *a = newvalue; return 1; } else return 0; #endif } lib/TH/THAtomic.h000066400000000000000000000057161316246254300137770ustar00rootroot00000000000000#ifndef TH_ATOMIC_INC #define TH_ATOMIC_INC #include "THGeneral.h" /****************************************************************************** * Atomic operations for TH * Five backends are integrated: * - C11 atomic operations * - MSVC intrinsics * - GCC intrinsics * - Pthread if none of the above is available * - Unsafe mode in none of the above is available ******************************************************************************/ /****************************************************************************** * all-purpose functions ******************************************************************************/ /* * *a = newvalue */ TH_API void THAtomicSet(int volatile *a, int newvalue); /* * return *a */ TH_API int THAtomicGet(int volatile *a); /* * *a += value, * return previous *a */ TH_API int THAtomicAdd(int volatile *a, int value); /* * check if (*a == oldvalue) * if true: set *a to newvalue, return 1 * if false: return 0 */ TH_API int THAtomicCompareAndSwap(int volatile *a, int oldvalue, int newvalue); /****************************************************************************** * refcounting functions ******************************************************************************/ /* * *a++ */ TH_API void THAtomicIncrementRef(int volatile *a); /* * *a--, * return 1 if *a == 0 after the operation, 0 otherwise */ TH_API int THAtomicDecrementRef(int volatile *a); /****************************************************************************** * functions for long type ******************************************************************************/ /* * *a = newvalue */ TH_API void THAtomicSetLong(long volatile *a, long newvalue); /* * return *a */ TH_API long THAtomicGetLong(long volatile *a); /* * *a += value, * return previous *a */ TH_API long THAtomicAddLong(long volatile *a, long value); /* * check if (*a == oldvalue) * if true: set *a to newvalue, return 1 * if false: return 0 */ TH_API long THAtomicCompareAndSwapLong(long volatile *a, long oldvalue, long newvalue); /****************************************************************************** * functions for ptrdiff_t type ******************************************************************************/ /* * *a = newvalue */ TH_API void THAtomicSetPtrdiff(ptrdiff_t volatile *a, ptrdiff_t newvalue); /* * return *a */ TH_API ptrdiff_t THAtomicGetPtrdiff(ptrdiff_t volatile *a); /* * *a += value, * return previous *a */ TH_API ptrdiff_t THAtomicAddPtrdiff(ptrdiff_t volatile *a, ptrdiff_t value); /* * check if (*a == oldvalue) * if true: set *a to newvalue, return 1 * if false: return 0 */ TH_API ptrdiff_t THAtomicCompareAndSwapPtrdiff(ptrdiff_t volatile *a, ptrdiff_t oldvalue, ptrdiff_t newvalue); #if defined(USE_C11_ATOMICS) && defined(ATOMIC_INT_LOCK_FREE) && \ ATOMIC_INT_LOCK_FREE == 2 #define TH_ATOMIC_IPC_REFCOUNT 1 #elif defined(USE_MSC_ATOMICS) || defined(USE_GCC_ATOMICS) #define TH_ATOMIC_IPC_REFCOUNT 1 #endif #endif lib/TH/THBlas.c000066400000000000000000000001211316246254300134200ustar00rootroot00000000000000#include "THBlas.h" #include "generic/THBlas.c" #include "THGenerateAllTypes.h" lib/TH/THBlas.h000066400000000000000000000002741316246254300134360ustar00rootroot00000000000000#ifndef TH_BLAS_INC #define TH_BLAS_INC #include "THGeneral.h" #define THBlas_(NAME) TH_CONCAT_4(TH,Real,Blas_,NAME) #include "generic/THBlas.h" #include "THGenerateAllTypes.h" #endif lib/TH/THConfig.cmake.in000066400000000000000000000004031316246254300152120ustar00rootroot00000000000000# Find the TH includes and library # # TH_INCLUDE_DIR -- where to find the includes # TH_LIBRARIES -- list of libraries to link against # TH_FOUND -- set to 1 if found SET(TH_FOUND 1) SET(TH_INCLUDE_DIR "@TH_INCLUDE_DIR@") SET(TH_LIBRARIES "@TH_LIBRARIES@") lib/TH/THDiskFile.c000066400000000000000000000611541316246254300142460ustar00rootroot00000000000000#include "THGeneral.h" #include "THDiskFile.h" #include "THFilePrivate.h" #include #ifndef LLONG_MAX #define LLONG_MAX 9223372036854775807LL #endif typedef struct THDiskFile__ { THFile file; FILE *handle; char *name; int isNativeEncoding; int longSize; } THDiskFile; static int THDiskFile_isOpened(THFile *self) { THDiskFile *dfself = (THDiskFile*)self; return (dfself->handle != NULL); } const char *THDiskFile_name(THFile *self) { THDiskFile *dfself = (THDiskFile*)self; return dfself->name; } /* workaround mac osx lion ***insane*** fread bug */ #ifdef __APPLE__ size_t fread__(void *ptr, size_t size, size_t nitems, FILE *stream) { size_t nread = 0; while(!feof(stream) && !ferror(stream) && (nread < nitems)) nread += fread((char*)ptr+nread*size, size, THMin(2147483648/size, nitems-nread), stream); return nread; } #else #define fread__ fread #endif #define READ_WRITE_METHODS(TYPE, TYPEC, ASCII_READ_ELEM, ASCII_WRITE_ELEM) \ static size_t THDiskFile_read##TYPEC(THFile *self, TYPE *data, size_t n) \ { \ THDiskFile *dfself = (THDiskFile*)(self); \ size_t nread = 0L; \ \ THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); \ THArgCheck(dfself->file.isReadable, 1, "attempt to read in a write-only file"); \ \ if(dfself->file.isBinary) \ { \ nread = fread__(data, sizeof(TYPE), n, dfself->handle); \ if(!dfself->isNativeEncoding && (sizeof(TYPE) > 1) && (nread > 0)) \ THDiskFile_reverseMemory(data, data, sizeof(TYPE), nread); \ } \ else \ { \ size_t i; \ for(i = 0; i < n; i++) \ { \ ASCII_READ_ELEM; /* increment here result and break if wrong */ \ } \ if(dfself->file.isAutoSpacing && (n > 0)) \ { \ int c = fgetc(dfself->handle); \ if( (c != '\n') && (c != EOF) ) \ ungetc(c, dfself->handle); \ } \ } \ \ if(nread != n) \ { \ dfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ \ if(!dfself->file.isQuiet) \ THError("read error: read %d blocks instead of %d", nread, n); \ } \ \ return nread; \ } \ \ static size_t THDiskFile_write##TYPEC(THFile *self, TYPE *data, size_t n) \ { \ THDiskFile *dfself = (THDiskFile*)(self); \ size_t nwrite = 0L; \ \ THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); \ THArgCheck(dfself->file.isWritable, 1, "attempt to write in a read-only file"); \ \ if(dfself->file.isBinary) \ { \ if(dfself->isNativeEncoding) \ { \ nwrite = fwrite(data, sizeof(TYPE), n, dfself->handle); \ } \ else \ { \ if(sizeof(TYPE) > 1) \ { \ char *buffer = THAlloc(sizeof(TYPE)*n); \ THDiskFile_reverseMemory(buffer, data, sizeof(TYPE), n); \ nwrite = fwrite(buffer, sizeof(TYPE), n, dfself->handle); \ THFree(buffer); \ } \ else \ nwrite = fwrite(data, sizeof(TYPE), n, dfself->handle); \ } \ } \ else \ { \ size_t i; \ for(i = 0; i < n; i++) \ { \ ASCII_WRITE_ELEM; \ if( dfself->file.isAutoSpacing && (i < n-1) ) \ fprintf(dfself->handle, " "); \ } \ if(dfself->file.isAutoSpacing && (n > 0)) \ fprintf(dfself->handle, "\n"); \ } \ \ if(nwrite != n) \ { \ dfself->file.hasError = 1; \ if(!dfself->file.isQuiet) \ THError("write error: wrote %d blocks instead of %d", nwrite, n); \ } \ \ return nwrite; \ } static int THDiskFile_mode(const char *mode, int *isReadable, int *isWritable) { *isReadable = 0; *isWritable = 0; if(strlen(mode) == 1) { if(*mode == 'r') { *isReadable = 1; return 1; } else if(*mode == 'w') { *isWritable = 1; return 1; } } else if(strlen(mode) == 2) { if(mode[0] == 'r' && mode[1] == 'w') { *isReadable = 1; *isWritable = 1; return 1; } } return 0; } static void THDiskFile_synchronize(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); fflush(dfself->handle); } static void THDiskFile_seek(THFile *self, size_t position) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); #if defined(_WIN64) THArgCheck(position <= (size_t)INT64_MAX, 2, "position must be smaller than INT64_MAX"); if(_fseeki64(dfself->handle, (__int64)position, SEEK_SET) < 0) #elif defined(_WIN32) THArgCheck(position <= (size_t)LONG_MAX, 2, "position must be smaller than LONG_MAX"); if(fseek(dfself->handle, (long)position, SEEK_SET) < 0) #else THArgCheck(position <= (size_t)LLONG_MAX, 2, "position must be smaller than LLONG_MAX"); if(fseeko(dfself->handle, (off_t)position, SEEK_SET) < 0) #endif { dfself->file.hasError = 1; if(!dfself->file.isQuiet) THError("unable to seek to position %zu", position); } } static void THDiskFile_seekEnd(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); #if defined(_WIN64) if(_fseeki64(dfself->handle, 0, SEEK_END) < 0) #elif defined(_WIN32) if(fseek(dfself->handle, 0, SEEK_END) < 0) #else if(fseeko(dfself->handle, 0, SEEK_END) < 0) #endif { dfself->file.hasError = 1; if(!dfself->file.isQuiet) THError("unable to seek at end of file"); } } static size_t THDiskFile_position(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); #if defined(_WIN64) __int64 offset = _ftelli64(dfself->handle); #elif defined(_WIN32) long offset = ftell(dfself->handle); #else off_t offset = ftello(dfself->handle); #endif if (offset > -1) return (size_t)offset; else if(!dfself->file.isQuiet) THError("unable to obtain disk file offset (maybe a long overflow occurred)"); return 0; } static void THDiskFile_close(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); fclose(dfself->handle); dfself->handle = NULL; } /* Little and Big Endian */ static void THDiskFile_reverseMemory(void *dst, const void *src, size_t blockSize, size_t numBlocks) { if(blockSize > 1) { size_t halfBlockSize = blockSize/2; char *charSrc = (char*)src; char *charDst = (char*)dst; size_t b, i; for(b = 0; b < numBlocks; b++) { for(i = 0; i < halfBlockSize; i++) { char z = charSrc[i]; charDst[i] = charSrc[blockSize-1-i]; charDst[blockSize-1-i] = z; } charSrc += blockSize; charDst += blockSize; } } } int THDiskFile_isLittleEndianCPU(void) { int x = 7; char *ptr = (char *)&x; if(ptr[0] == 0) return 0; else return 1; } int THDiskFile_isBigEndianCPU(void) { return(!THDiskFile_isLittleEndianCPU()); } void THDiskFile_nativeEndianEncoding(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); dfself->isNativeEncoding = 1; } void THDiskFile_littleEndianEncoding(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); dfself->isNativeEncoding = THDiskFile_isLittleEndianCPU(); } void THDiskFile_bigEndianEncoding(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); dfself->isNativeEncoding = !THDiskFile_isLittleEndianCPU(); } /* End of Little and Big Endian Stuff */ void THDiskFile_longSize(THFile *self, int size) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); THArgCheck(size == 0 || size == 4 || size == 8, 1, "Invalid long size specified"); dfself->longSize = size; } void THDiskFile_noBuffer(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); if (setvbuf(dfself->handle, NULL, _IONBF, 0)) { THError("error: cannot disable buffer"); } } static void THDiskFile_free(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); if(dfself->handle) fclose(dfself->handle); THFree(dfself->name); THFree(dfself); } /* READ_WRITE_METHODS(int, Bool, */ /* int value = 0; int ret = fscanf(file->handle, "%d", &value); array[i] = (value ? 1 : 0); if(ret <= 0) break; else result++, */ /* int value = (array[i] ? 1 : 0); nElemWritten = fprintf(file->handle, "%d", value), */ /* true) */ /* Note that we do a trick */ READ_WRITE_METHODS(unsigned char, Byte, nread = fread(data, 1, n, dfself->handle); break, nwrite = fwrite(data, 1, n, dfself->handle); break) READ_WRITE_METHODS(char, Char, nread = fread(data, 1, n, dfself->handle); break, nwrite = fwrite(data, 1, n, dfself->handle); break) READ_WRITE_METHODS(short, Short, int ret = fscanf(dfself->handle, "%hd", &data[i]); if(ret <= 0) break; else nread++, int ret = fprintf(dfself->handle, "%hd", data[i]); if(ret <= 0) break; else nwrite++) READ_WRITE_METHODS(int, Int, int ret = fscanf(dfself->handle, "%d", &data[i]); if(ret <= 0) break; else nread++, int ret = fprintf(dfself->handle, "%d", data[i]); if(ret <= 0) break; else nwrite++) READ_WRITE_METHODS(float, Float, int ret = fscanf(dfself->handle, "%g", &data[i]); if(ret <= 0) break; else nread++, int ret = fprintf(dfself->handle, "%.9g", data[i]); if(ret <= 0) break; else nwrite++) READ_WRITE_METHODS(THHalf, Half, float buf; int ret = fscanf(dfself->handle, "%g", &buf); if(ret <= 0) break; else { data[i]= TH_float2half(buf); nread++; }, int ret = fprintf(dfself->handle, "%.9g", TH_half2float(data[i])); if(ret <= 0) break; else nwrite++) READ_WRITE_METHODS(double, Double, int ret = fscanf(dfself->handle, "%lg", &data[i]); if(ret <= 0) break; else nread++, int ret = fprintf(dfself->handle, "%.17g", data[i]); if(ret <= 0) break; else nwrite++) /* For Long we need to rewrite everything, because of the special management of longSize */ static size_t THDiskFile_readLong(THFile *self, long *data, size_t n) { THDiskFile *dfself = (THDiskFile*)(self); size_t nread = 0L; THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); THArgCheck(dfself->file.isReadable, 1, "attempt to read in a write-only file"); if(dfself->file.isBinary) { if(dfself->longSize == 0 || dfself->longSize == sizeof(long)) { nread = fread__(data, sizeof(long), n, dfself->handle); if(!dfself->isNativeEncoding && (sizeof(long) > 1) && (nread > 0)) THDiskFile_reverseMemory(data, data, sizeof(long), nread); } else if(dfself->longSize == 4) { nread = fread__(data, 4, n, dfself->handle); if(!dfself->isNativeEncoding && (nread > 0)) THDiskFile_reverseMemory(data, data, 4, nread); size_t i; for(i = nread; i > 0; i--) data[i-1] = ((int *)data)[i-1]; } else /* if(dfself->longSize == 8) */ { int big_endian = !THDiskFile_isLittleEndianCPU(); int32_t *buffer = THAlloc(8*n); nread = fread__(buffer, 8, n, dfself->handle); size_t i; for(i = nread; i > 0; i--) data[i-1] = buffer[2*(i-1) + big_endian]; THFree(buffer); if(!dfself->isNativeEncoding && (nread > 0)) THDiskFile_reverseMemory(data, data, 4, nread); } } else { size_t i; for(i = 0; i < n; i++) { int ret = fscanf(dfself->handle, "%ld", &data[i]); if(ret <= 0) break; else nread++; } if(dfself->file.isAutoSpacing && (n > 0)) { int c = fgetc(dfself->handle); if( (c != '\n') && (c != EOF) ) ungetc(c, dfself->handle); } } if(nread != n) { dfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ if(!dfself->file.isQuiet) THError("read error: read %d blocks instead of %d", nread, n); } return nread; } static size_t THDiskFile_writeLong(THFile *self, long *data, size_t n) { THDiskFile *dfself = (THDiskFile*)(self); size_t nwrite = 0L; THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); THArgCheck(dfself->file.isWritable, 1, "attempt to write in a read-only file"); if(dfself->file.isBinary) { if(dfself->longSize == 0 || dfself->longSize == sizeof(long)) { if(dfself->isNativeEncoding) { nwrite = fwrite(data, sizeof(long), n, dfself->handle); } else { char *buffer = THAlloc(sizeof(long)*n); THDiskFile_reverseMemory(buffer, data, sizeof(long), n); nwrite = fwrite(buffer, sizeof(long), n, dfself->handle); THFree(buffer); } } else if(dfself->longSize == 4) { int32_t *buffer = THAlloc(4*n); size_t i; for(i = 0; i < n; i++) buffer[i] = data[i]; if(!dfself->isNativeEncoding) THDiskFile_reverseMemory(buffer, buffer, 4, n); nwrite = fwrite(buffer, 4, n, dfself->handle); THFree(buffer); } else /* if(dfself->longSize == 8) */ { int big_endian = !THDiskFile_isLittleEndianCPU(); int32_t *buffer = THAlloc(8*n); size_t i; for(i = 0; i < n; i++) { buffer[2*i + !big_endian] = 0; buffer[2*i + big_endian] = data[i]; } if(!dfself->isNativeEncoding) THDiskFile_reverseMemory(buffer, buffer, 8, n); nwrite = fwrite(buffer, 8, n, dfself->handle); THFree(buffer); } } else { size_t i; for(i = 0; i < n; i++) { int ret = fprintf(dfself->handle, "%ld", data[i]); if(ret <= 0) break; else nwrite++; if( dfself->file.isAutoSpacing && (i < n-1) ) fprintf(dfself->handle, " "); } if(dfself->file.isAutoSpacing && (n > 0)) fprintf(dfself->handle, "\n"); } if(nwrite != n) { dfself->file.hasError = 1; if(!dfself->file.isQuiet) THError("write error: wrote %d blocks instead of %d", nwrite, n); } return nwrite; } static size_t THDiskFile_readString(THFile *self, const char *format, char **str_) { THDiskFile *dfself = (THDiskFile*)(self); THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); THArgCheck(dfself->file.isReadable, 1, "attempt to read in a write-only file"); THArgCheck((strlen(format) >= 2 ? (format[0] == '*') && (format[1] == 'a' || format[1] == 'l') : 0), 2, "format must be '*a' or '*l'"); /* note: the string won't survive long, as it is copied into lua */ /* so 1024 is not that big... */ #define TBRS_BSZ 1024L if(format[1] == 'a') { char *p = THAlloc(TBRS_BSZ); size_t total = TBRS_BSZ; size_t pos = 0; for (;;) { if(total-pos == 0) /* we need more space! */ { total += TBRS_BSZ; p = THRealloc(p, total); } pos += fread(p+pos, 1, total-pos, dfself->handle); if (pos < total) /* eof? */ { if(pos == 0) { THFree(p); dfself->file.hasError = 1; if(!dfself->file.isQuiet) THError("read error: read 0 blocks instead of 1"); *str_ = NULL; return 0; } *str_ = p; return pos; } } } else { char *p = THAlloc(TBRS_BSZ); size_t total = TBRS_BSZ; size_t pos = 0; size_t size; for (;;) { if(total-pos <= 1) /* we can only write '\0' in there! */ { total += TBRS_BSZ; p = THRealloc(p, total); } if (fgets(p+pos, total-pos, dfself->handle) == NULL) /* eof? */ { if(pos == 0) { THFree(p); dfself->file.hasError = 1; if(!dfself->file.isQuiet) THError("read error: read 0 blocks instead of 1"); *str_ = NULL; return 0; } *str_ = p; return pos; } size = strlen(p+pos); if (size == 0 || (p+pos)[size-1] != '\n') { pos += size; } else { pos += size-1; /* do not include `eol' */ *str_ = p; return pos; } } } *str_ = NULL; return 0; } static size_t THDiskFile_writeString(THFile *self, const char *str, size_t size) { THDiskFile *dfself = (THDiskFile*)(self); size_t nwrite; THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); THArgCheck(dfself->file.isWritable, 1, "attempt to write in a read-only file"); nwrite = fwrite(str, 1, size, dfself->handle); if(nwrite != size) { dfself->file.hasError = 1; if(!dfself->file.isQuiet) THError("write error: wrote %zu blocks instead of %zu", nwrite, size); } return nwrite; } THFile *THDiskFile_new(const char *name, const char *mode, int isQuiet) { static struct THFileVTable vtable = { THDiskFile_isOpened, THDiskFile_readByte, THDiskFile_readChar, THDiskFile_readShort, THDiskFile_readInt, THDiskFile_readLong, THDiskFile_readFloat, THDiskFile_readDouble, THDiskFile_readHalf, THDiskFile_readString, THDiskFile_writeByte, THDiskFile_writeChar, THDiskFile_writeShort, THDiskFile_writeInt, THDiskFile_writeLong, THDiskFile_writeFloat, THDiskFile_writeDouble, THDiskFile_writeHalf, THDiskFile_writeString, THDiskFile_synchronize, THDiskFile_seek, THDiskFile_seekEnd, THDiskFile_position, THDiskFile_close, THDiskFile_free }; int isReadable; int isWritable; FILE *handle; THDiskFile *self; THArgCheck(THDiskFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w' or 'rw'"); if( isReadable && isWritable ) { handle = fopen(name, "r+b"); if(!handle) { handle = fopen(name, "wb"); if(handle) { fclose(handle); handle = fopen(name, "r+b"); } } } else handle = fopen(name, (isReadable ? "rb" : "wb")); if(!handle) { if(isQuiet) return 0; else THError("cannot open <%s> in mode %c%c", name, (isReadable ? 'r' : ' '), (isWritable ? 'w' : ' ')); } self = THAlloc(sizeof(THDiskFile)); self->handle = handle; self->name = THAlloc(strlen(name)+1); strcpy(self->name, name); self->isNativeEncoding = 1; self->longSize = 0; self->file.vtable = &vtable; self->file.isQuiet = isQuiet; self->file.isReadable = isReadable; self->file.isWritable = isWritable; self->file.isBinary = 0; self->file.isAutoSpacing = 1; self->file.hasError = 0; return (THFile*)self; } /* PipeFile */ static int THPipeFile_mode(const char *mode, int *isReadable, int *isWritable) { *isReadable = 0; *isWritable = 0; if(strlen(mode) == 1) { if(*mode == 'r') { *isReadable = 1; return 1; } else if(*mode == 'w') { *isWritable = 1; return 1; } } return 0; } static void THPipeFile_free(THFile *self) { THDiskFile *dfself = (THDiskFile*)(self); if(dfself->handle) pclose(dfself->handle); THFree(dfself->name); THFree(dfself); } THFile *THPipeFile_new(const char *name, const char *mode, int isQuiet) { static struct THFileVTable vtable = { THDiskFile_isOpened, THDiskFile_readByte, THDiskFile_readChar, THDiskFile_readShort, THDiskFile_readInt, THDiskFile_readLong, THDiskFile_readFloat, THDiskFile_readDouble, THDiskFile_readHalf, THDiskFile_readString, THDiskFile_writeByte, THDiskFile_writeChar, THDiskFile_writeShort, THDiskFile_writeInt, THDiskFile_writeLong, THDiskFile_writeFloat, THDiskFile_writeDouble, THDiskFile_writeHalf, THDiskFile_writeString, THDiskFile_synchronize, THDiskFile_seek, THDiskFile_seekEnd, THDiskFile_position, THDiskFile_close, THPipeFile_free }; int isReadable; int isWritable; FILE *handle; THDiskFile *self; THArgCheck(THPipeFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w'"); #ifdef _WIN32 handle = _popen(name, (isReadable ? "rb" : "wb")); #else handle = popen(name, (isReadable ? "r" : "w")); #endif if(!handle) { if(isQuiet) return 0; else THError("cannot open <%s> in mode %c%c. This might be because eg the executable doesn't exist, but it could also be because you are out of memory.", name, (isReadable ? 'r' : ' '), (isWritable ? 'w' : ' ')); } self = THAlloc(sizeof(THDiskFile)); self->handle = handle; self->name = THAlloc(strlen(name)+1); strcpy(self->name, name); self->isNativeEncoding = 1; self->longSize = 0; self->file.vtable = &vtable; self->file.isQuiet = isQuiet; self->file.isReadable = isReadable; self->file.isWritable = isWritable; self->file.isBinary = 0; self->file.isAutoSpacing = 1; self->file.hasError = 0; return (THFile*)self; } lib/TH/THDiskFile.h000066400000000000000000000012251316246254300142440ustar00rootroot00000000000000#ifndef TH_DISK_FILE_INC #define TH_DISK_FILE_INC #include "THFile.h" TH_API THFile *THDiskFile_new(const char *name, const char *mode, int isQuiet); TH_API THFile *THPipeFile_new(const char *name, const char *mode, int isQuiet); TH_API const char *THDiskFile_name(THFile *self); TH_API int THDiskFile_isLittleEndianCPU(void); TH_API int THDiskFile_isBigEndianCPU(void); TH_API void THDiskFile_nativeEndianEncoding(THFile *self); TH_API void THDiskFile_littleEndianEncoding(THFile *self); TH_API void THDiskFile_bigEndianEncoding(THFile *self); TH_API void THDiskFile_longSize(THFile *self, int size); TH_API void THDiskFile_noBuffer(THFile *self); #endif lib/TH/THFile.c000066400000000000000000000107151316246254300134300ustar00rootroot00000000000000#include "THFile.h" #include "THFilePrivate.h" #define IMPLEMENT_THFILE_RW(TYPEC, TYPE) \ size_t THFile_read##TYPEC##Raw(THFile *self, TYPE *data, size_t n) \ { \ return (*self->vtable->read##TYPEC)(self, data, n); \ } \ \ size_t THFile_write##TYPEC##Raw(THFile *self, TYPE *data, size_t n) \ { \ return (*self->vtable->write##TYPEC)(self, data, n); \ } IMPLEMENT_THFILE_RW(Byte, unsigned char) IMPLEMENT_THFILE_RW(Char, char) IMPLEMENT_THFILE_RW(Short, short) IMPLEMENT_THFILE_RW(Int, int) IMPLEMENT_THFILE_RW(Long, long) IMPLEMENT_THFILE_RW(Float, float) IMPLEMENT_THFILE_RW(Double, double) IMPLEMENT_THFILE_RW(Half, THHalf) size_t THFile_readStringRaw(THFile *self, const char *format, char **str_) { return self->vtable->readString(self, format, str_); } size_t THFile_writeStringRaw(THFile *self, const char *str, size_t size) { return self->vtable->writeString(self, str, size); } void THFile_synchronize(THFile *self) { self->vtable->synchronize(self); } void THFile_seek(THFile *self, size_t position) { self->vtable->seek(self, position); } void THFile_seekEnd(THFile *self) { self->vtable->seekEnd(self); } size_t THFile_position(THFile *self) { return self->vtable->position(self); } void THFile_close(THFile *self) { self->vtable->close(self); } void THFile_free(THFile *self) { self->vtable->free(self); } int THFile_isOpened(THFile *self) { return self->vtable->isOpened(self); } #define IMPLEMENT_THFILE_FLAGS(FLAG) \ int THFile_##FLAG(THFile *self) \ { \ return self->FLAG; \ } IMPLEMENT_THFILE_FLAGS(isQuiet) IMPLEMENT_THFILE_FLAGS(isReadable) IMPLEMENT_THFILE_FLAGS(isWritable) IMPLEMENT_THFILE_FLAGS(isBinary) IMPLEMENT_THFILE_FLAGS(isAutoSpacing) IMPLEMENT_THFILE_FLAGS(hasError) void THFile_binary(THFile *self) { self->isBinary = 1; } void THFile_ascii(THFile *self) { self->isBinary = 0; } void THFile_autoSpacing(THFile *self) { self->isAutoSpacing = 1; } void THFile_noAutoSpacing(THFile *self) { self->isAutoSpacing = 0; } void THFile_quiet(THFile *self) { self->isQuiet = 1; } void THFile_pedantic(THFile *self) { self->isQuiet = 0; } void THFile_clearError(THFile *self) { self->hasError = 0; } #define IMPLEMENT_THFILE_SCALAR(TYPEC, TYPE) \ TYPE THFile_read##TYPEC##Scalar(THFile *self) \ { \ TYPE scalar; \ THFile_read##TYPEC##Raw(self, &scalar, 1); \ return scalar; \ } \ \ void THFile_write##TYPEC##Scalar(THFile *self, TYPE scalar) \ { \ THFile_write##TYPEC##Raw(self, &scalar, 1); \ } IMPLEMENT_THFILE_SCALAR(Byte, unsigned char) IMPLEMENT_THFILE_SCALAR(Char, char) IMPLEMENT_THFILE_SCALAR(Short, short) IMPLEMENT_THFILE_SCALAR(Int, int) IMPLEMENT_THFILE_SCALAR(Long, long) IMPLEMENT_THFILE_SCALAR(Float, float) IMPLEMENT_THFILE_SCALAR(Double, double) IMPLEMENT_THFILE_SCALAR(Half, THHalf) #define IMPLEMENT_THFILE_STORAGE(TYPEC, TYPE) \ size_t THFile_read##TYPEC(THFile *self, TH##TYPEC##Storage *storage) \ { \ return THFile_read##TYPEC##Raw(self, storage->data, storage->size); \ } \ \ size_t THFile_write##TYPEC(THFile *self, TH##TYPEC##Storage *storage) \ { \ return THFile_write##TYPEC##Raw(self, storage->data, storage->size); \ } IMPLEMENT_THFILE_STORAGE(Byte, unsigned char) IMPLEMENT_THFILE_STORAGE(Char, char) IMPLEMENT_THFILE_STORAGE(Short, short) IMPLEMENT_THFILE_STORAGE(Int, int) IMPLEMENT_THFILE_STORAGE(Long, long) IMPLEMENT_THFILE_STORAGE(Float, float) IMPLEMENT_THFILE_STORAGE(Double, double) IMPLEMENT_THFILE_STORAGE(Half, THHalf) lib/TH/THFile.h000066400000000000000000000105321316246254300134320ustar00rootroot00000000000000#ifndef TH_FILE_INC #define TH_FILE_INC #include "THStorage.h" typedef struct THFile__ THFile; TH_API int THFile_isOpened(THFile *self); TH_API int THFile_isQuiet(THFile *self); TH_API int THFile_isReadable(THFile *self); TH_API int THFile_isWritable(THFile *self); TH_API int THFile_isBinary(THFile *self); TH_API int THFile_isAutoSpacing(THFile *self); TH_API int THFile_hasError(THFile *self); TH_API void THFile_binary(THFile *self); TH_API void THFile_ascii(THFile *self); TH_API void THFile_autoSpacing(THFile *self); TH_API void THFile_noAutoSpacing(THFile *self); TH_API void THFile_quiet(THFile *self); TH_API void THFile_pedantic(THFile *self); TH_API void THFile_clearError(THFile *self); /* scalar */ TH_API unsigned char THFile_readByteScalar(THFile *self); TH_API char THFile_readCharScalar(THFile *self); TH_API short THFile_readShortScalar(THFile *self); TH_API int THFile_readIntScalar(THFile *self); TH_API long THFile_readLongScalar(THFile *self); TH_API float THFile_readFloatScalar(THFile *self); TH_API double THFile_readDoubleScalar(THFile *self); TH_API void THFile_writeByteScalar(THFile *self, unsigned char scalar); TH_API void THFile_writeCharScalar(THFile *self, char scalar); TH_API void THFile_writeShortScalar(THFile *self, short scalar); TH_API void THFile_writeIntScalar(THFile *self, int scalar); TH_API void THFile_writeLongScalar(THFile *self, long scalar); TH_API void THFile_writeFloatScalar(THFile *self, float scalar); TH_API void THFile_writeDoubleScalar(THFile *self, double scalar); /* storage */ TH_API size_t THFile_readByte(THFile *self, THByteStorage *storage); TH_API size_t THFile_readChar(THFile *self, THCharStorage *storage); TH_API size_t THFile_readShort(THFile *self, THShortStorage *storage); TH_API size_t THFile_readInt(THFile *self, THIntStorage *storage); TH_API size_t THFile_readLong(THFile *self, THLongStorage *storage); TH_API size_t THFile_readFloat(THFile *self, THFloatStorage *storage); TH_API size_t THFile_readDouble(THFile *self, THDoubleStorage *storage); TH_API size_t THFile_writeByte(THFile *self, THByteStorage *storage); TH_API size_t THFile_writeChar(THFile *self, THCharStorage *storage); TH_API size_t THFile_writeShort(THFile *self, THShortStorage *storage); TH_API size_t THFile_writeInt(THFile *self, THIntStorage *storage); TH_API size_t THFile_writeLong(THFile *self, THLongStorage *storage); TH_API size_t THFile_writeFloat(THFile *self, THFloatStorage *storage); TH_API size_t THFile_writeDouble(THFile *self, THDoubleStorage *storage); /* raw */ TH_API size_t THFile_readByteRaw(THFile *self, unsigned char *data, size_t n); TH_API size_t THFile_readCharRaw(THFile *self, char *data, size_t n); TH_API size_t THFile_readShortRaw(THFile *self, short *data, size_t n); TH_API size_t THFile_readIntRaw(THFile *self, int *data, size_t n); TH_API size_t THFile_readLongRaw(THFile *self, long *data, size_t n); TH_API size_t THFile_readFloatRaw(THFile *self, float *data, size_t n); TH_API size_t THFile_readDoubleRaw(THFile *self, double *data, size_t n); TH_API size_t THFile_readStringRaw(THFile *self, const char *format, char **str_); /* you must deallocate str_ */ TH_API size_t THFile_writeByteRaw(THFile *self, unsigned char *data, size_t n); TH_API size_t THFile_writeCharRaw(THFile *self, char *data, size_t n); TH_API size_t THFile_writeShortRaw(THFile *self, short *data, size_t n); TH_API size_t THFile_writeIntRaw(THFile *self, int *data, size_t n); TH_API size_t THFile_writeLongRaw(THFile *self, long *data, size_t n); TH_API size_t THFile_writeFloatRaw(THFile *self, float *data, size_t n); TH_API size_t THFile_writeDoubleRaw(THFile *self, double *data, size_t n); TH_API size_t THFile_writeStringRaw(THFile *self, const char *str, size_t size); TH_API THHalf THFile_readHalfScalar(THFile *self); TH_API void THFile_writeHalfScalar(THFile *self, THHalf scalar); TH_API size_t THFile_readHalf(THFile *self, THHalfStorage *storage); TH_API size_t THFile_writeHalf(THFile *self, THHalfStorage *storage); TH_API size_t THFile_readHalfRaw(THFile *self, THHalf* data, size_t size); TH_API size_t THFile_writeHalfRaw(THFile *self, THHalf* data, size_t size); TH_API void THFile_synchronize(THFile *self); TH_API void THFile_seek(THFile *self, size_t position); TH_API void THFile_seekEnd(THFile *self); TH_API size_t THFile_position(THFile *self); TH_API void THFile_close(THFile *self); TH_API void THFile_free(THFile *self); #endif lib/TH/THFilePrivate.h000066400000000000000000000032251316246254300147660ustar00rootroot00000000000000#include "THGeneral.h" #include "THHalf.h" struct THFile__ { struct THFileVTable *vtable; int isQuiet; int isReadable; int isWritable; int isBinary; int isAutoSpacing; int hasError; }; /* virtual table definition */ struct THFileVTable { int (*isOpened)(THFile *self); size_t (*readByte)(THFile *self, unsigned char *data, size_t n); size_t (*readChar)(THFile *self, char *data, size_t n); size_t (*readShort)(THFile *self, short *data, size_t n); size_t (*readInt)(THFile *self, int *data, size_t n); size_t (*readLong)(THFile *self, long *data, size_t n); size_t (*readFloat)(THFile *self, float *data, size_t n); size_t (*readDouble)(THFile *self, double *data, size_t n); size_t (*readHalf)(THFile *self, THHalf *data, size_t n); size_t (*readString)(THFile *self, const char *format, char **str_); size_t (*writeByte)(THFile *self, unsigned char *data, size_t n); size_t (*writeChar)(THFile *self, char *data, size_t n); size_t (*writeShort)(THFile *self, short *data, size_t n); size_t (*writeInt)(THFile *self, int *data, size_t n); size_t (*writeLong)(THFile *self, long *data, size_t n); size_t (*writeFloat)(THFile *self, float *data, size_t n); size_t (*writeDouble)(THFile *self, double *data, size_t n); size_t (*writeHalf)(THFile *self, THHalf *data, size_t n); size_t (*writeString)(THFile *self, const char *str, size_t size); void (*synchronize)(THFile *self); void (*seek)(THFile *self, size_t position); void (*seekEnd)(THFile *self); size_t (*position)(THFile *self); void (*close)(THFile *self); void (*free)(THFile *self); }; lib/TH/THGeneral.c000066400000000000000000000232171316246254300141270ustar00rootroot00000000000000#include "THGeneral.h" #include "THAtomic.h" #ifdef _OPENMP #include #endif #ifndef TH_HAVE_THREAD #define __thread #elif _MSC_VER #define __thread __declspec( thread ) #endif #if (defined(__unix) || defined(_WIN32)) #if defined(__FreeBSD__) #include #else #include #endif #elif defined(__APPLE__) #include #endif /* Torch Error Handling */ static void defaultErrorHandlerFunction(const char *msg, void *data) { printf("$ Error: %s\n", msg); exit(-1); } static THErrorHandlerFunction defaultErrorHandler = defaultErrorHandlerFunction; static void *defaultErrorHandlerData; static __thread THErrorHandlerFunction threadErrorHandler = NULL; static __thread void *threadErrorHandlerData; void _THError(const char *file, const int line, const char *fmt, ...) { char msg[2048]; va_list args; /* vasprintf not standard */ /* vsnprintf: how to handle if does not exists? */ va_start(args, fmt); int n = vsnprintf(msg, 2048, fmt, args); va_end(args); if(n < 2048) { snprintf(msg + n, 2048 - n, " at %s:%d", file, line); } if (threadErrorHandler) (*threadErrorHandler)(msg, threadErrorHandlerData); else (*defaultErrorHandler)(msg, defaultErrorHandlerData); } void _THAssertionFailed(const char *file, const int line, const char *exp, const char *fmt, ...) { char msg[1024]; va_list args; va_start(args, fmt); vsnprintf(msg, 1024, fmt, args); va_end(args); _THError(file, line, "Assertion `%s' failed. %s", exp, msg); } void THSetErrorHandler(THErrorHandlerFunction new_handler, void *data) { threadErrorHandler = new_handler; threadErrorHandlerData = data; } void THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data) { if (new_handler) defaultErrorHandler = new_handler; else defaultErrorHandler = defaultErrorHandlerFunction; defaultErrorHandlerData = data; } /* Torch Arg Checking Handling */ static void defaultArgErrorHandlerFunction(int argNumber, const char *msg, void *data) { if(msg) printf("$ Invalid argument %d: %s\n", argNumber, msg); else printf("$ Invalid argument %d\n", argNumber); exit(-1); } static THArgErrorHandlerFunction defaultArgErrorHandler = defaultArgErrorHandlerFunction; static void *defaultArgErrorHandlerData; static __thread THArgErrorHandlerFunction threadArgErrorHandler = NULL; static __thread void *threadArgErrorHandlerData; void _THArgCheck(const char *file, int line, int condition, int argNumber, const char *fmt, ...) { if(!condition) { char msg[2048]; va_list args; /* vasprintf not standard */ /* vsnprintf: how to handle if does not exists? */ va_start(args, fmt); int n = vsnprintf(msg, 2048, fmt, args); va_end(args); if(n < 2048) { snprintf(msg + n, 2048 - n, " at %s:%d", file, line); } if (threadArgErrorHandler) (*threadArgErrorHandler)(argNumber, msg, threadArgErrorHandlerData); else (*defaultArgErrorHandler)(argNumber, msg, defaultArgErrorHandlerData); } } void THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data) { threadArgErrorHandler = new_handler; threadArgErrorHandlerData = data; } void THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data) { if (new_handler) defaultArgErrorHandler = new_handler; else defaultArgErrorHandler = defaultArgErrorHandlerFunction; defaultArgErrorHandlerData = data; } static __thread void (*torchGCFunction)(void *data) = NULL; static __thread void *torchGCData; static ptrdiff_t heapSize = 0; static __thread ptrdiff_t heapDelta = 0; static const ptrdiff_t heapMaxDelta = (ptrdiff_t)1e6; // limit to +/- 1MB before updating heapSize static const ptrdiff_t heapMinDelta = (ptrdiff_t)-1e6; static __thread ptrdiff_t heapSoftmax = (ptrdiff_t)3e8; // 300MB, adjusted upward dynamically static const double heapSoftmaxGrowthThresh = 0.8; // grow softmax if >80% max after GC static const double heapSoftmaxGrowthFactor = 1.4; // grow softmax by 40% /* Optional hook for integrating with a garbage-collected frontend. * * If torch is running with a garbage-collected frontend (e.g. Lua), * the GC isn't aware of TH-allocated memory so may not know when it * needs to run. These hooks trigger the GC to run in two cases: * * (1) When a memory allocation (malloc, realloc, ...) fails * (2) When the total TH-allocated memory hits a dynamically-adjusted * soft maximum. */ void THSetGCHandler( void (*torchGCFunction_)(void *data), void *data ) { torchGCFunction = torchGCFunction_; torchGCData = data; } /* it is guaranteed the allocated size is not bigger than PTRDIFF_MAX */ static ptrdiff_t getAllocSize(void *ptr) { #if defined(__unix) && defined(HAVE_MALLOC_USABLE_SIZE) return malloc_usable_size(ptr); #elif defined(__APPLE__) return malloc_size(ptr); #elif defined(_WIN32) if(ptr) { return _msize(ptr); } else { return 0; } #else return 0; #endif } static ptrdiff_t applyHeapDelta() { ptrdiff_t oldHeapSize = THAtomicAddPtrdiff(&heapSize, heapDelta); #ifdef DEBUG if (heapDelta > 0 && oldHeapSize > PTRDIFF_MAX - heapDelta) THError("applyHeapDelta: heapSize(%td) + increased(%td) > PTRDIFF_MAX, heapSize overflow!", oldHeapSize, heapDelta); if (heapDelta < 0 && oldHeapSize < PTRDIFF_MIN - heapDelta) THError("applyHeapDelta: heapSize(%td) + decreased(%td) < PTRDIFF_MIN, heapSize underflow!", oldHeapSize, heapDelta); #endif ptrdiff_t newHeapSize = oldHeapSize + heapDelta; heapDelta = 0; return newHeapSize; } /* (1) if the torch-allocated heap size exceeds the soft max, run GC * (2) if post-GC heap size exceeds 80% of the soft max, increase the * soft max by 40% */ static void maybeTriggerGC(ptrdiff_t curHeapSize) { if (torchGCFunction && curHeapSize > heapSoftmax) { torchGCFunction(torchGCData); // ensure heapSize is accurate before updating heapSoftmax ptrdiff_t newHeapSize = applyHeapDelta(); if (newHeapSize > heapSoftmax * heapSoftmaxGrowthThresh) { heapSoftmax = (ptrdiff_t)(heapSoftmax * heapSoftmaxGrowthFactor); } } } // hooks into the TH heap tracking void THHeapUpdate(ptrdiff_t size) { #ifdef DEBUG if (size > 0 && heapDelta > PTRDIFF_MAX - size) THError("THHeapUpdate: heapDelta(%td) + increased(%td) > PTRDIFF_MAX, heapDelta overflow!", heapDelta, size); if (size < 0 && heapDelta < PTRDIFF_MIN - size) THError("THHeapUpdate: heapDelta(%td) + decreased(%td) < PTRDIFF_MIN, heapDelta underflow!", heapDelta, size); #endif heapDelta += size; // batch updates to global heapSize to minimize thread contention if (heapDelta < heapMaxDelta && heapDelta > heapMinDelta) { return; } ptrdiff_t newHeapSize = applyHeapDelta(); if (size > 0) { maybeTriggerGC(newHeapSize); } } static void* THAllocInternal(ptrdiff_t size) { void *ptr; if (size > 5120) { #if (defined(__unix) || defined(__APPLE__)) && (!defined(DISABLE_POSIX_MEMALIGN)) if (posix_memalign(&ptr, 64, size) != 0) ptr = NULL; /* #elif defined(_WIN32) ptr = _aligned_malloc(size, 64); */ #else ptr = malloc(size); #endif } else { ptr = malloc(size); } THHeapUpdate(getAllocSize(ptr)); return ptr; } void* THAlloc(ptrdiff_t size) { void *ptr; if(size < 0) THError("$ Torch: invalid memory size -- maybe an overflow?"); if(size == 0) return NULL; ptr = THAllocInternal(size); if(!ptr && torchGCFunction) { torchGCFunction(torchGCData); ptr = THAllocInternal(size); } if(!ptr) THError("$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!", size/1073741824); return ptr; } void* THRealloc(void *ptr, ptrdiff_t size) { if(!ptr) return(THAlloc(size)); if(size == 0) { THFree(ptr); return NULL; } if(size < 0) THError("$ Torch: invalid memory size -- maybe an overflow?"); ptrdiff_t oldSize = -getAllocSize(ptr); void *newptr = realloc(ptr, size); if(!newptr && torchGCFunction) { torchGCFunction(torchGCData); newptr = realloc(ptr, size); } if(!newptr) THError("$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824); // update heapSize only after successfully reallocated THHeapUpdate(oldSize + getAllocSize(newptr)); return newptr; } void THFree(void *ptr) { THHeapUpdate(-getAllocSize(ptr)); free(ptr); } double THLog1p(const double x) { #if (defined(_MSC_VER) || defined(__MINGW32__)) volatile double y = 1 + x; return log(y) - ((y-1)-x)/y ; /* cancels errors with IEEE arithmetic */ #else return log1p(x); #endif } void THSetNumThreads(int num_threads) { #ifdef _OPENMP omp_set_num_threads(num_threads); #endif } int THGetNumThreads(void) { #ifdef _OPENMP return omp_get_max_threads(); #else return 1; #endif } int THGetNumCores(void) { #ifdef _OPENMP return omp_get_num_procs(); #else return 1; #endif } #ifdef TH_BLAS_MKL extern int mkl_get_max_threads(void); #endif TH_API void THInferNumThreads(void) { #if defined(_OPENMP) && defined(TH_BLAS_MKL) // If we are using MKL an OpenMP make sure the number of threads match. // Otherwise, MKL and our OpenMP-enabled functions will keep changing the // size of the OpenMP thread pool, resulting in worse performance (and memory // leaks in GCC 5.4) omp_set_num_threads(mkl_get_max_threads()); #endif } TH_API THDescBuff _THSizeDesc(const long *size, const long ndim) { const int L = TH_DESC_BUFF_LEN; THDescBuff buf; char *str = buf.str; int n = 0; n += snprintf(str, L-n, "["); int i; for(i = 0; i < ndim; i++) { if(n >= L) break; n += snprintf(str+n, L-n, "%ld", size[i]); if(i < ndim-1) { n += snprintf(str+n, L-n, " x "); } } if(n < L - 2) { snprintf(str+n, L-n, "]"); } else { snprintf(str+L-5, 5, "...]"); } return buf; } lib/TH/THGeneral.h.in000066400000000000000000000106111316246254300145330ustar00rootroot00000000000000#ifndef TH_GENERAL_INC #define TH_GENERAL_INC #include #include #include #include #include #include #include #include #include #cmakedefine USE_BLAS #cmakedefine USE_LAPACK #cmakedefine BLAS_F2C #cmakedefine BLAS_USE_CBLAS_DOT #ifdef __cplusplus # define TH_EXTERNC extern "C" #else # define TH_EXTERNC extern #endif #ifdef _WIN32 # ifdef TH_EXPORTS # define TH_API TH_EXTERNC __declspec(dllexport) # else # define TH_API TH_EXTERNC __declspec(dllimport) # endif #else # define TH_API TH_EXTERNC #endif #ifndef M_PI # define M_PI 3.14159265358979323846 #endif #ifndef TH_INDEX_BASE #define TH_INDEX_BASE 1 #endif typedef void (*THErrorHandlerFunction)(const char *msg, void *data); typedef void (*THArgErrorHandlerFunction)(int argNumber, const char *msg, void *data); #define TH_DESC_BUFF_LEN 64 typedef struct { char str[TH_DESC_BUFF_LEN]; } THDescBuff; TH_API double THLog1p(const double x); TH_API THDescBuff _THSizeDesc(const long *size, const long ndim); TH_API void _THError(const char *file, const int line, const char *fmt, ...); TH_API void _THAssertionFailed(const char *file, const int line, const char *exp, const char *fmt, ...); TH_API void THSetErrorHandler(THErrorHandlerFunction new_handler, void *data); TH_API void THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data); TH_API void _THArgCheck(const char *file, int line, int condition, int argNumber, const char *fmt, ...); TH_API void THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data); TH_API void THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data); TH_API void* THAlloc(ptrdiff_t size); TH_API void* THRealloc(void *ptr, ptrdiff_t size); TH_API void THFree(void *ptr); TH_API void THSetGCHandler( void (*torchGCHandlerFunction)(void *data), void *data ); // this hook should only be called by custom allocator functions TH_API void THHeapUpdate(ptrdiff_t size); TH_API void THSetNumThreads(int num_threads); TH_API int THGetNumThreads(void); TH_API int THGetNumCores(void); TH_API void THInferNumThreads(void); #define THError(...) _THError(__FILE__, __LINE__, __VA_ARGS__) #define THCleanup(...) __VA_ARGS__ #define THArgCheck(...) \ do { \ _THArgCheck(__FILE__, __LINE__, __VA_ARGS__); \ } while(0) #define THArgCheckWithCleanup(condition, cleanup, ...) \ do if (!(condition)) { \ cleanup \ _THArgCheck(__FILE__, __LINE__, 0, __VA_ARGS__); \ } while(0) #define THAssert(exp) \ do { \ if (!(exp)) { \ _THAssertionFailed(__FILE__, __LINE__, #exp, ""); \ } \ } while(0) #define THAssertMsg(exp, ...) \ do { \ if (!(exp)) { \ _THAssertionFailed(__FILE__, __LINE__, #exp, __VA_ARGS__); \ } \ } while(0) #define TH_CONCAT_STRING_2(x,y) TH_CONCAT_STRING_2_EXPAND(x,y) #define TH_CONCAT_STRING_2_EXPAND(x,y) #x #y #define TH_CONCAT_STRING_3(x,y,z) TH_CONCAT_STRING_3_EXPAND(x,y,z) #define TH_CONCAT_STRING_3_EXPAND(x,y,z) #x #y #z #define TH_CONCAT_STRING_4(x,y,z,w) TH_CONCAT_STRING_4_EXPAND(x,y,z,w) #define TH_CONCAT_STRING_4_EXPAND(x,y,z,w) #x #y #z #w #define TH_CONCAT_2(x,y) TH_CONCAT_2_EXPAND(x,y) #define TH_CONCAT_2_EXPAND(x,y) x ## y #define TH_CONCAT_3(x,y,z) TH_CONCAT_3_EXPAND(x,y,z) #define TH_CONCAT_3_EXPAND(x,y,z) x ## y ## z #define TH_CONCAT_4_EXPAND(x,y,z,w) x ## y ## z ## w #define TH_CONCAT_4(x,y,z,w) TH_CONCAT_4_EXPAND(x,y,z,w) #define THMin(X, Y) ((X) < (Y) ? (X) : (Y)) #define THMax(X, Y) ((X) > (Y) ? (X) : (Y)) #if (defined(_MSC_VER) || defined(__MINGW32__)) # define log1p(x) THLog1p(x) #define snprintf _snprintf #define popen _popen #define pclose _pclose #include typedef SSIZE_T ssize_t; #endif #endif lib/TH/THGenerateAllTypes.h000066400000000000000000000006251316246254300157650ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #error "You must define TH_GENERIC_FILE before including THGenerateAllTypes.h" #endif #ifndef THGenerateManyTypes #define THAllLocalGenerateManyTypes #define THGenerateManyTypes #endif #include "THGenerateFloatTypes.h" #include "THGenerateIntTypes.h" #ifdef THAllLocalGenerateManyTypes #undef THAllLocalGenerateManyTypes #undef THGenerateManyTypes #undef TH_GENERIC_FILE #endif lib/TH/THGenerateByteType.h000066400000000000000000000011131316246254300157660ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #error "You must define TH_GENERIC_FILE before including THGenerateByteType.h" #endif #define real unsigned char #define accreal long #define Real Byte #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) #define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) #define THInf UCHAR_MAX #define TH_REAL_IS_BYTE #line 1 TH_GENERIC_FILE #include TH_GENERIC_FILE #undef real #undef accreal #undef Real #undef THInf #undef TH_REAL_IS_BYTE #undef TH_CONVERT_REAL_TO_ACCREAL #undef TH_CONVERT_ACCREAL_TO_REAL #ifndef THGenerateManyTypes #undef TH_GENERIC_FILE #endif lib/TH/THGenerateCharType.h000066400000000000000000000011011316246254300157350ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #error "You must define TH_GENERIC_FILE before including THGenerateCharType.h" #endif #define real char #define accreal long #define Real Char #define THInf CHAR_MAX #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) #define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) #define TH_REAL_IS_CHAR #line 1 TH_GENERIC_FILE #include TH_GENERIC_FILE #undef real #undef accreal #undef Real #undef THInf #undef TH_REAL_IS_CHAR #undef TH_CONVERT_REAL_TO_ACCREAL #undef TH_CONVERT_ACCREAL_TO_REAL #ifndef THGenerateManyTypes #undef TH_GENERIC_FILE #endif lib/TH/THGenerateDoubleType.h000066400000000000000000000011141316246254300162760ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #error "You must define TH_GENERIC_FILE before including THGenerateDoubleType.h" #endif #define real double #define accreal double #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) #define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) #define Real Double #define THInf DBL_MAX #define TH_REAL_IS_DOUBLE #line 1 TH_GENERIC_FILE #include TH_GENERIC_FILE #undef accreal #undef real #undef Real #undef THInf #undef TH_REAL_IS_DOUBLE #undef TH_CONVERT_REAL_TO_ACCREAL #undef TH_CONVERT_ACCREAL_TO_REAL #ifndef THGenerateManyTypes #undef TH_GENERIC_FILE #endif lib/TH/THGenerateFloatType.h000066400000000000000000000011071316246254300161330ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #error "You must define TH_GENERIC_FILE before including THGenerateFloatType.h" #endif #define real float #define accreal double #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) #define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) #define Real Float #define THInf FLT_MAX #define TH_REAL_IS_FLOAT #line 1 TH_GENERIC_FILE #include TH_GENERIC_FILE #undef accreal #undef real #undef Real #undef THInf #undef TH_REAL_IS_FLOAT #undef TH_CONVERT_REAL_TO_ACCREAL #undef TH_CONVERT_ACCREAL_TO_REAL #ifndef THGenerateManyTypes #undef TH_GENERIC_FILE #endif lib/TH/THGenerateFloatTypes.h000066400000000000000000000006361316246254300163240ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #error "You must define TH_GENERIC_FILE before including THGenerateFloatTypes.h" #endif #ifndef THGenerateManyTypes #define THFloatLocalGenerateManyTypes #define THGenerateManyTypes #endif #include "THGenerateFloatType.h" #include "THGenerateDoubleType.h" #ifdef THFloatLocalGenerateManyTypes #undef THFloatLocalGenerateManyTypes #undef THGenerateManyTypes #undef TH_GENERIC_FILE #endif lib/TH/THGenerateHalfType.h000066400000000000000000000011771316246254300157470ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #error "You must define TH_GENERIC_FILE before including THGenerateHalfType.h" #endif #include "THHalf.h" #define real THHalf #define accreal float #define TH_CONVERT_REAL_TO_ACCREAL(_val) TH_half2float(_val) #define TH_CONVERT_ACCREAL_TO_REAL(_val) TH_float2half(_val) #define Real Half #define THInf TH_HALF_BITS_TO_LITERAL(TH_HALF_INF) #define TH_REAL_IS_HALF #line 1 TH_GENERIC_FILE #include TH_GENERIC_FILE #undef real #undef accreal #undef Real #undef THInf #undef TH_REAL_IS_HALF #undef TH_CONVERT_REAL_TO_ACCREAL #undef TH_CONVERT_ACCREAL_TO_REAL #ifndef THGenerateManyTypes #undef TH_GENERIC_FILE #endif lib/TH/THGenerateIntType.h000066400000000000000000000010731316246254300156220ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #error "You must define TH_GENERIC_FILE before including THGenerateIntType.h" #endif #define real int #define accreal long #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) #define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) #define Real Int #define THInf INT_MAX #define TH_REAL_IS_INT #line 1 TH_GENERIC_FILE #include TH_GENERIC_FILE #undef real #undef accreal #undef Real #undef THInf #undef TH_REAL_IS_INT #undef TH_CONVERT_REAL_TO_ACCREAL #undef TH_CONVERT_ACCREAL_TO_REAL #ifndef THGenerateManyTypes #undef TH_GENERIC_FILE #endif lib/TH/THGenerateIntTypes.h000066400000000000000000000007631316246254300160120ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #error "You must define TH_GENERIC_FILE before including THGenerateIntTypes.h" #endif #ifndef THGenerateManyTypes #define THIntLocalGenerateManyTypes #define THGenerateManyTypes #endif #include "THGenerateByteType.h" #include "THGenerateCharType.h" #include "THGenerateShortType.h" #include "THGenerateIntType.h" #include "THGenerateLongType.h" #ifdef THIntLocalGenerateManyTypes #undef THIntLocalGenerateManyTypes #undef THGenerateManyTypes #undef TH_GENERIC_FILE #endif lib/TH/THGenerateLongType.h000066400000000000000000000011011316246254300157570ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #error "You must define TH_GENERIC_FILE before including THGenerateLongType.h" #endif #define real long #define accreal long #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) #define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) #define Real Long #define THInf LONG_MAX #define TH_REAL_IS_LONG #line 1 TH_GENERIC_FILE #include TH_GENERIC_FILE #undef real #undef accreal #undef Real #undef THInf #undef TH_REAL_IS_LONG #undef TH_CONVERT_REAL_TO_ACCREAL #undef TH_CONVERT_ACCREAL_TO_REAL #ifndef THGenerateManyTypes #undef TH_GENERIC_FILE #endif lib/TH/THGenerateShortType.h000066400000000000000000000011061316246254300161640ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #error "You must define TH_GENERIC_FILE before including THGenerateShortType.h" #endif #define real short #define accreal long #define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) #define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) #define Real Short #define THInf SHRT_MAX #define TH_REAL_IS_SHORT #line 1 TH_GENERIC_FILE #include TH_GENERIC_FILE #undef real #undef accreal #undef Real #undef THInf #undef TH_REAL_IS_SHORT #undef TH_CONVERT_REAL_TO_ACCREAL #undef TH_CONVERT_ACCREAL_TO_REAL #ifndef THGenerateManyTypes #undef TH_GENERIC_FILE #endif lib/TH/THHalf.c000066400000000000000000000045131316246254300134220ustar00rootroot00000000000000#include "THHalf.h" /* Copyright 1993-2014 NVIDIA Corporation. All rights reserved. */ THHalf TH_float2half(float f) { THHalf h; TH_float2halfbits(&f, &h.x); return h; } TH_API float TH_half2float(THHalf h) { float f; TH_halfbits2float(&h.x, &f); return f; } // Host functions for converting between FP32 and FP16 formats void TH_halfbits2float(unsigned short* src, float* res) { unsigned h = *src; unsigned sign = ((h >> 15) & 1); unsigned exponent = ((h >> 10) & 0x1f); unsigned mantissa = ((h & 0x3ff) << 13); if (exponent == 0x1f) { /* NaN or Inf */ mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0); exponent = 0xff; } else if (!exponent) { /* Denorm or Zero */ if (mantissa) { unsigned int msb; exponent = 0x71; do { msb = (mantissa & 0x400000); mantissa <<= 1; /* normalize */ --exponent; } while (!msb); mantissa &= 0x7fffff; /* 1.mantissa is implicit */ } } else { exponent += 0x70; } *(unsigned*)res = ((sign << 31) | (exponent << 23) | mantissa); } void TH_float2halfbits(float* src, unsigned short* dest) { unsigned x = *(unsigned*)src; unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1; unsigned sign, exponent, mantissa; // Get rid of +NaN/-NaN case first. if (u > 0x7f800000) { *dest = 0x7fffU; return ; } sign = ((x >> 16) & 0x8000); // Get rid of +Inf/-Inf, +0/-0. if (u > 0x477fefff) { *dest = sign | 0x7c00U; return; } if (u < 0x33000001) { *dest = (sign | 0x0000); return; } exponent = ((u >> 23) & 0xff); mantissa = (u & 0x7fffff); if (exponent > 0x70) { shift = 13; exponent -= 0x70; } else { shift = 0x7e - exponent; exponent = 0; mantissa |= 0x800000; } lsb = (1 << shift); lsb_s1 = (lsb >> 1); lsb_m1 = (lsb - 1); // Round to nearest even. remainder = (mantissa & lsb_m1); mantissa >>= shift; if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) { ++mantissa; if (!(mantissa & 0x3ff)) { ++exponent; mantissa = 0; } } *dest = (sign | (exponent << 10) | mantissa); } lib/TH/THHalf.h000066400000000000000000000015531316246254300134300ustar00rootroot00000000000000#ifndef TH_HALF_H #define TH_HALF_H #include "THGeneral.h" #include /* Neither built-in nor included from Cutorch, use our definition lifted from CUDA */ #if defined(__GNUC__) #define __thalign__(n) __attribute__((aligned(n))) #elif defined(_WIN32) #define __thalign__(n) __declspec(align(n)) #else #define __thalign__(n) #endif typedef struct __thalign__(2){ unsigned short x; } __THHalf; typedef struct __thalign__(4) { unsigned int x; } __THHalf2; typedef __THHalf THHalf; typedef __THHalf2 THHalf2; TH_API void TH_float2halfbits(float*, unsigned short*); TH_API void TH_halfbits2float(unsigned short*, float*); TH_API THHalf TH_float2half(float); TH_API float TH_half2float(THHalf); #ifndef TH_HALF_BITS_TO_LITERAL # define TH_HALF_BITS_TO_LITERAL(n) { n } #endif #define TH_HALF_ZERO 0x0U #define TH_HALF_INF 0x7C00U #undef __thalign__ #endif lib/TH/THLapack.c000066400000000000000000000001271316246254300137400ustar00rootroot00000000000000#include "THLapack.h" #include "generic/THLapack.c" #include "THGenerateFloatTypes.h" lib/TH/THLapack.h000066400000000000000000000016671316246254300137570ustar00rootroot00000000000000#ifndef TH_LAPACK_INC #define TH_LAPACK_INC #include "THGeneral.h" #define THLapack_(NAME) TH_CONCAT_4(TH,Real,Lapack_,NAME) #define THLapackCheck(fmt, func, info , ...) \ if (info < 0) { \ THError("Lapack Error in %s : Illegal Argument %d", func, -info); \ } else if(info > 0) { \ THError(fmt, func, info, ##__VA_ARGS__); \ } \ #define THLapackCheckWithCleanup(fmt, cleanup, func, info , ...) \ if (info < 0) { \ cleanup \ THError("Lapack Error in %s : Illegal Argument %d", func, -info); \ } else if(info > 0) { \ cleanup \ THError(fmt, func, info, ##__VA_ARGS__); \ } #include "generic/THLapack.h" #include "THGenerateAllTypes.h" #endif lib/TH/THLogAdd.c000066400000000000000000000033101316246254300136740ustar00rootroot00000000000000#include "THLogAdd.h" #include #ifdef USE_DOUBLE #define MINUS_LOG_THRESHOLD -39.14 #else #define MINUS_LOG_THRESHOLD -18.42 #endif const double THLog2Pi=1.83787706640934548355; const double THLogZero=-DBL_MAX; const double THLogOne=0; double THLogAdd(double log_a, double log_b) { double minusdif; if (log_a < log_b) { double tmp = log_a; log_a = log_b; log_b = tmp; } minusdif = log_b - log_a; #ifdef DEBUG if (isnan(minusdif)) THError("THLogAdd: minusdif (%f) log_b (%f) or log_a (%f) is nan", minusdif, log_b, log_a); #endif if (minusdif < MINUS_LOG_THRESHOLD) return log_a; else return log_a + log1p(exp(minusdif)); } double THLogSub(double log_a, double log_b) { double minusdif; if (log_a < log_b) THError("LogSub: log_a (%f) should be greater than log_b (%f)", log_a, log_b); minusdif = log_b - log_a; #ifdef DEBUG if (isnan(minusdif)) THError("LogSub: minusdif (%f) log_b (%f) or log_a (%f) is nan", minusdif, log_b, log_a); #endif if (log_a == log_b) return THLogZero; else if (minusdif < MINUS_LOG_THRESHOLD) return log_a; else return log_a + log1p(-exp(minusdif)); } /* Credits to Leon Bottou */ double THExpMinusApprox(const double x) { #define EXACT_EXPONENTIAL 0 #if EXACT_EXPONENTIAL return exp(-x); #else /* fast approximation of exp(-x) for x positive */ # define A0 (1.0) # define A1 (0.125) # define A2 (0.0078125) # define A3 (0.00032552083) # define A4 (1.0172526e-5) if (x < 13.0) { /* assert(x>=0); */ double y; y = A0+x*(A1+x*(A2+x*(A3+x*A4))); y *= y; y *= y; y *= y; y = 1/y; return y; } return 0; # undef A0 # undef A1 # undef A2 # undef A3 # undef A4 #endif } lib/TH/THLogAdd.h000066400000000000000000000005031316246254300137020ustar00rootroot00000000000000#ifndef TH_LOG_ADD_INC #define TH_LOG_ADD_INC #include "THGeneral.h" TH_API const double THLog2Pi; TH_API const double THLogZero; TH_API const double THLogOne; TH_API double THLogAdd(double log_a, double log_b); TH_API double THLogSub(double log_a, double log_b); TH_API double THExpMinusApprox(const double x); #endif lib/TH/THMath.h000066400000000000000000000012731316246254300134460ustar00rootroot00000000000000#ifndef _THMATH_H #define _THMATH_H static inline double TH_sigmoid(double value) { return 1.0 / (1.0 + exp(-value)); } static inline double TH_frac(double x) { return x - trunc(x); } static inline double TH_rsqrt(double x) { return 1.0 / sqrt(x); } static inline double TH_lerp(double a, double b, double weight) { return a + weight * (b-a); } static inline float TH_sigmoidf(float value) { return 1.0f / (1.0f + expf(-value)); } static inline float TH_fracf(float x) { return x - truncf(x); } static inline float TH_rsqrtf(float x) { return 1.0f / sqrtf(x); } static inline float TH_lerpf(float a, float b, float weight) { return a + weight * (b-a); } #endif // _THMATH_H lib/TH/THMemoryFile.c000066400000000000000000000632221316246254300146220ustar00rootroot00000000000000#include "THMemoryFile.h" #include "THFilePrivate.h" #include "stdint.h" typedef struct THMemoryFile__ { THFile file; THCharStorage *storage; size_t size; size_t position; int longSize; } THMemoryFile; static int THMemoryFile_isOpened(THFile *self) { THMemoryFile *mfself = (THMemoryFile*)self; return (mfself->storage != NULL); } static char *THMemoryFile_strnextspace(char *str_, char *c_) { char c; while( (c = *str_) ) { if( (c != ' ') && (c != '\n') && (c != ':') && (c != ';') ) break; str_++; } while( (c = *str_) ) { if( (c == ' ') || (c == '\n') || (c == ':') || (c == ';') ) { *c_ = c; *str_ = '\0'; return(str_); } str_++; } return NULL; } static void THMemoryFile_grow(THMemoryFile *self, size_t size) { size_t missingSpace; if(size <= self->size) return; else { if(size < self->storage->size) /* note the "<" and not "<=" */ { self->size = size; self->storage->data[self->size] = '\0'; return; } } missingSpace = size-self->storage->size+1; /* +1 for the '\0' */ THCharStorage_resize(self->storage, (self->storage->size/2 > missingSpace ? self->storage->size + (self->storage->size/2) : self->storage->size + missingSpace)); } static int THMemoryFile_mode(const char *mode, int *isReadable, int *isWritable) { *isReadable = 0; *isWritable = 0; if(strlen(mode) == 1) { if(*mode == 'r') { *isReadable = 1; return 1; } else if(*mode == 'w') { *isWritable = 1; return 1; } } else if(strlen(mode) == 2) { if(mode[0] == 'r' && mode[1] == 'w') { *isReadable = 1; *isWritable = 1; return 1; } } return 0; } /********************************************************/ #define READ_WRITE_METHODS(TYPE, TYPEC, ASCII_READ_ELEM, ASCII_WRITE_ELEM, INSIDE_SPACING) \ static size_t THMemoryFile_read##TYPEC(THFile *self, TYPE *data, size_t n) \ { \ THMemoryFile *mfself = (THMemoryFile*)self; \ size_t nread = 0; \ \ THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); \ THArgCheck(mfself->file.isReadable, 1, "attempt to read in a write-only file"); \ \ if (n == 0) \ return 0; \ \ if(mfself->file.isBinary) \ { \ size_t nByte = sizeof(TYPE)*n; \ size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position); \ nread = nByteRemaining/sizeof(TYPE); \ memmove(data, mfself->storage->data+mfself->position, nread*sizeof(TYPE)); \ mfself->position += nread*sizeof(TYPE); \ } \ else \ { \ size_t i; \ for(i = 0; i < n; i++) \ { \ size_t nByteRead = 0; \ char spaceChar = 0; \ char *spacePtr = THMemoryFile_strnextspace(mfself->storage->data+mfself->position, &spaceChar); \ ASCII_READ_ELEM; \ if(ret == EOF) \ { \ while(mfself->storage->data[mfself->position]) \ mfself->position++; \ } \ else \ mfself->position += nByteRead; \ if(spacePtr) \ *spacePtr = spaceChar; \ } \ if(mfself->file.isAutoSpacing && (n > 0)) \ { \ if( (mfself->position < mfself->size) && (mfself->storage->data[mfself->position] == '\n') ) \ mfself->position++; \ } \ } \ \ if(nread != n) \ { \ mfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ \ if(!mfself->file.isQuiet) \ THError("read error: read %d blocks instead of %d", nread, n); \ } \ \ return nread; \ } \ \ static size_t THMemoryFile_write##TYPEC(THFile *self, TYPE *data, size_t n) \ { \ THMemoryFile *mfself = (THMemoryFile*)self; \ \ THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); \ THArgCheck(mfself->file.isWritable, 1, "attempt to write in a read-only file"); \ \ if (n == 0) \ return 0; \ \ if(mfself->file.isBinary) \ { \ size_t nByte = sizeof(TYPE)*n; \ THMemoryFile_grow(mfself, mfself->position+nByte); \ memmove(mfself->storage->data+mfself->position, data, nByte); \ mfself->position += nByte; \ if(mfself->position > mfself->size) \ { \ mfself->size = mfself->position; \ mfself->storage->data[mfself->size] = '\0'; \ } \ } \ else \ { \ size_t i; \ for(i = 0; i < n; i++) \ { \ ssize_t nByteWritten; \ while (1) \ { \ ASCII_WRITE_ELEM; \ if( (nByteWritten > -1) && (nByteWritten < mfself->storage->size-mfself->position) ) \ { \ mfself->position += nByteWritten; \ break; \ } \ THMemoryFile_grow(mfself, mfself->storage->size + (mfself->storage->size/2) + 2); \ } \ if(mfself->file.isAutoSpacing) \ { \ if(i < n-1) \ { \ THMemoryFile_grow(mfself, mfself->position+1); \ sprintf(mfself->storage->data+mfself->position, " "); \ mfself->position++; \ } \ if(i == n-1) \ { \ THMemoryFile_grow(mfself, mfself->position+1); \ sprintf(mfself->storage->data+mfself->position, "\n"); \ mfself->position++; \ } \ } \ } \ if(mfself->position > mfself->size) \ { \ mfself->size = mfself->position; \ mfself->storage->data[mfself->size] = '\0'; \ } \ } \ \ return n; \ } void THMemoryFile_longSize(THFile *self, int size) { THMemoryFile *dfself = (THMemoryFile*)(self); THArgCheck(size == 0 || size == 4 || size == 8, 1, "Invalid long size specified"); dfself->longSize = size; } THCharStorage *THMemoryFile_storage(THFile *self) { THMemoryFile *mfself = (THMemoryFile*)self; THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); THCharStorage_resize(mfself->storage, mfself->size+1); return mfself->storage; } static void THMemoryFile_synchronize(THFile *self) { THMemoryFile *mfself = (THMemoryFile*)self; THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); } static void THMemoryFile_seek(THFile *self, size_t position) { THMemoryFile *mfself = (THMemoryFile*)self; THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); THArgCheck(position >= 0, 2, "position must be positive"); if(position <= mfself->size) mfself->position = position; else { mfself->file.hasError = 1; if(!mfself->file.isQuiet) THError("unable to seek at position %zu", position); } } static void THMemoryFile_seekEnd(THFile *self) { THMemoryFile *mfself = (THMemoryFile*)self; THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); mfself->position = mfself->size; } static size_t THMemoryFile_position(THFile *self) { THMemoryFile *mfself = (THMemoryFile*)self; THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); return mfself->position; } static void THMemoryFile_close(THFile *self) { THMemoryFile *mfself = (THMemoryFile*)self; THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); THCharStorage_free(mfself->storage); mfself->storage = NULL; } static void THMemoryFile_free(THFile *self) { THMemoryFile *mfself = (THMemoryFile*)self; if(mfself->storage) THCharStorage_free(mfself->storage); THFree(mfself); } /* READ_WRITE_METHODS(bool, Bool, */ /* int value = 0; int ret = sscanf(mfself->storage->data+mfself->position, "%d%n", &value, &nByteRead); data[i] = (value ? 1 : 0), */ /* int value = (data[i] ? 1 : 0); nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%d", value), */ /* 1) */ READ_WRITE_METHODS(unsigned char, Byte, size_t ret = (mfself->position + n <= mfself->size ? n : mfself->size-mfself->position); \ if(spacePtr) *spacePtr = spaceChar; \ nByteRead = ret; \ nread = ret; \ i = n-1; \ memmove(data, mfself->storage->data+mfself->position, nByteRead), nByteWritten = (n < mfself->storage->size-mfself->position ? n : -1); \ i = n-1; \ if(nByteWritten > -1) memmove(mfself->storage->data+mfself->position, data, nByteWritten), 0) /* DEBUG: we should check if %n is count or not as a element (so ret might need to be ret-- on some systems) */ /* Note that we do a trick for char */ READ_WRITE_METHODS(char, Char, size_t ret = (mfself->position + n <= mfself->size ? n : mfself->size-mfself->position); \ if(spacePtr) *spacePtr = spaceChar; \ nByteRead = ret; \ nread = ret; \ i = n-1; \ memmove(data, mfself->storage->data+mfself->position, nByteRead), nByteWritten = (n < mfself->storage->size-mfself->position ? n : -1); \ i = n-1; \ if(nByteWritten > -1) memmove(mfself->storage->data+mfself->position, data, nByteWritten), 0) READ_WRITE_METHODS(short, Short, int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%hd%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++, nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%hd", data[i]), 1) READ_WRITE_METHODS(int, Int, int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%d%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++, nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%d", data[i]), 1) READ_WRITE_METHODS(float, Float, int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%g%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++, nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%.9g", data[i]), 1) READ_WRITE_METHODS(THHalf, Half, int nByteRead_; float buf; \ int ret = sscanf(mfself->storage->data+mfself->position, "%g%n", &buf, &nByteRead_); \ data[i] = TH_float2half(buf); nByteRead = nByteRead_; if(ret <= 0) break; else nread++, nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%.9g", TH_half2float(data[i])), 1) READ_WRITE_METHODS(double, Double, int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%lg%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++, nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%.17g", data[i]), 1) int THDiskFile_isLittleEndianCPU(void); static size_t THMemoryFile_readLong(THFile *self, long *data, size_t n) { THMemoryFile *mfself = (THMemoryFile*)self; size_t nread = 0L; THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); THArgCheck(mfself->file.isReadable, 1, "attempt to read in a write-only file"); if (n == 0) return 0; if(mfself->file.isBinary) { if(mfself->longSize == 0 || mfself->longSize == sizeof(long)) { size_t nByte = sizeof(long)*n; size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position); nread = nByteRemaining/sizeof(long); memmove(data, mfself->storage->data+mfself->position, nread*sizeof(long)); mfself->position += nread*sizeof(long); } else if(mfself->longSize == 4) { size_t nByte = 4*n; size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position); int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position); nread = nByteRemaining/4; size_t i; for(i = 0; i < nread; i++) data[i] = storage[i]; mfself->position += nread*4; } else /* if(mfself->longSize == 8) */ { int big_endian = !THDiskFile_isLittleEndianCPU(); size_t nByte = 8*n; int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position); size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position); nread = nByteRemaining/8; size_t i; for(i = 0; i < nread; i++) data[i] = storage[2*i + big_endian]; mfself->position += nread*8; } } else { size_t i; for(i = 0; i < n; i++) { size_t nByteRead = 0; char spaceChar = 0; char *spacePtr = THMemoryFile_strnextspace(mfself->storage->data+mfself->position, &spaceChar); int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%ld%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++; if(ret == EOF) { while(mfself->storage->data[mfself->position]) mfself->position++; } else mfself->position += nByteRead; if(spacePtr) *spacePtr = spaceChar; } if(mfself->file.isAutoSpacing && (n > 0)) { if( (mfself->position < mfself->size) && (mfself->storage->data[mfself->position] == '\n') ) mfself->position++; } } if(nread != n) { mfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ if(!mfself->file.isQuiet) THError("read error: read %d blocks instead of %d", nread, n); } return nread; } static size_t THMemoryFile_writeLong(THFile *self, long *data, size_t n) { THMemoryFile *mfself = (THMemoryFile*)self; THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); THArgCheck(mfself->file.isWritable, 1, "attempt to write in a read-only file"); if (n == 0) return 0; if(mfself->file.isBinary) { if(mfself->longSize == 0 || mfself->longSize == sizeof(long)) { size_t nByte = sizeof(long)*n; THMemoryFile_grow(mfself, mfself->position+nByte); memmove(mfself->storage->data+mfself->position, data, nByte); mfself->position += nByte; } else if(mfself->longSize == 4) { size_t nByte = 4*n; THMemoryFile_grow(mfself, mfself->position+nByte); int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position); size_t i; for(i = 0; i < n; i++) storage[i] = data[i]; mfself->position += nByte; } else /* if(mfself->longSize == 8) */ { int big_endian = !THDiskFile_isLittleEndianCPU(); size_t nByte = 8*n; THMemoryFile_grow(mfself, mfself->position+nByte); int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position); size_t i; for(i = 0; i < n; i++) { storage[2*i + !big_endian] = 0; storage[2*i + big_endian] = data[i]; } mfself->position += nByte; } if(mfself->position > mfself->size) { mfself->size = mfself->position; mfself->storage->data[mfself->size] = '\0'; } } else { size_t i; for(i = 0; i < n; i++) { ssize_t nByteWritten; while (1) { nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%ld", data[i]); if( (nByteWritten > -1) && (nByteWritten < mfself->storage->size-mfself->position) ) { mfself->position += nByteWritten; break; } THMemoryFile_grow(mfself, mfself->storage->size + (mfself->storage->size/2) + 2); } if(mfself->file.isAutoSpacing) { if(i < n-1) { THMemoryFile_grow(mfself, mfself->position+1); sprintf(mfself->storage->data+mfself->position, " "); mfself->position++; } if(i == n-1) { THMemoryFile_grow(mfself, mfself->position+1); sprintf(mfself->storage->data+mfself->position, "\n"); mfself->position++; } } } if(mfself->position > mfself->size) { mfself->size = mfself->position; mfself->storage->data[mfself->size] = '\0'; } } return n; } static char* THMemoryFile_cloneString(const char *str, ptrdiff_t size) { char *cstr = THAlloc(size); memcpy(cstr, str, size); return cstr; } static size_t THMemoryFile_readString(THFile *self, const char *format, char **str_) { THMemoryFile *mfself = (THMemoryFile*)self; THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); THArgCheck(mfself->file.isReadable, 1, "attempt to read in a write-only file"); THArgCheck((strlen(format) >= 2 ? (format[0] == '*') && (format[1] == 'a' || format[1] == 'l') : 0), 2, "format must be '*a' or '*l'"); if(mfself->position == mfself->size) /* eof ? */ { mfself->file.hasError = 1; if(!mfself->file.isQuiet) THError("read error: read 0 blocks instead of 1"); *str_ = NULL; return 0; } if(format[1] == 'a') { size_t str_size = mfself->size-mfself->position; *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, str_size); mfself->position = mfself->size; return str_size; } else { char *p = mfself->storage->data+mfself->position; int eolFound = 0; size_t posEol; size_t i; for(i = 0; i < mfself->size-mfself->position; i++) { if(p[i] == '\n') { posEol = i; eolFound = 1; break; } } if(eolFound) { *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, posEol); mfself->position += posEol+1; return posEol; } else /* well, we read all! */ { size_t str_size = mfself->size-mfself->position; *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, str_size); mfself->position = mfself->size; return str_size; } } *str_ = NULL; return 0; } static size_t THMemoryFile_writeString(THFile *self, const char *str, size_t size) { THMemoryFile *mfself = (THMemoryFile*)self; THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); THArgCheck(mfself->file.isWritable, 1, "attempt to write in a read-only file"); THMemoryFile_grow(mfself, mfself->position+size); memmove(mfself->storage->data+mfself->position, str, size); mfself->position += size; if(mfself->position > mfself->size) { mfself->size = mfself->position; mfself->storage->data[mfself->size] = '\0'; } return size; } THFile *THMemoryFile_newWithStorage(THCharStorage *storage, const char *mode) { static struct THFileVTable vtable = { THMemoryFile_isOpened, THMemoryFile_readByte, THMemoryFile_readChar, THMemoryFile_readShort, THMemoryFile_readInt, THMemoryFile_readLong, THMemoryFile_readFloat, THMemoryFile_readDouble, THMemoryFile_readHalf, THMemoryFile_readString, THMemoryFile_writeByte, THMemoryFile_writeChar, THMemoryFile_writeShort, THMemoryFile_writeInt, THMemoryFile_writeLong, THMemoryFile_writeFloat, THMemoryFile_writeDouble, THMemoryFile_writeHalf, THMemoryFile_writeString, THMemoryFile_synchronize, THMemoryFile_seek, THMemoryFile_seekEnd, THMemoryFile_position, THMemoryFile_close, THMemoryFile_free }; THMemoryFile *mfself; int isReadable; int isWritable; if(storage) { THArgCheck(storage->data[storage->size-1] == '\0', 1, "provided CharStorage must be terminated by 0"); THArgCheck(THMemoryFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w' or 'rw'"); THCharStorage_retain(storage); } else { THArgCheck(THMemoryFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w' or 'rw'"); storage = THCharStorage_newWithSize(1); storage->data[0] = '\0'; } mfself = THAlloc(sizeof(THMemoryFile)); mfself->storage = storage; mfself->size = (storage ? storage->size-1 : 0); mfself->position = 0; mfself->longSize = 0; mfself->file.vtable = &vtable; mfself->file.isQuiet = 0; mfself->file.isReadable = isReadable; mfself->file.isWritable = isWritable; mfself->file.isBinary = 0; mfself->file.isAutoSpacing = 1; mfself->file.hasError = 0; return (THFile*)mfself; } THFile *THMemoryFile_new(const char *mode) { return THMemoryFile_newWithStorage(NULL, mode); } lib/TH/THMemoryFile.h000066400000000000000000000005521316246254300146240ustar00rootroot00000000000000#ifndef TH_MEMORY_FILE_INC #define TH_MEMORY_FILE_INC #include "THFile.h" #include "THStorage.h" TH_API THFile *THMemoryFile_newWithStorage(THCharStorage *storage, const char *mode); TH_API THFile *THMemoryFile_new(const char *mode); TH_API THCharStorage *THMemoryFile_storage(THFile *self); TH_API void THMemoryFile_longSize(THFile *self, int size); #endif lib/TH/THRandom.c000066400000000000000000000200411316246254300137620ustar00rootroot00000000000000#include "THGeneral.h" #include "THRandom.h" #ifndef _WIN32 #include #include #endif /* Code for the Mersenne Twister random generator.... */ #define n _MERSENNE_STATE_N #define m _MERSENNE_STATE_M /* Creates (unseeded) new generator*/ static THGenerator* THGenerator_newUnseeded() { THGenerator *self = THAlloc(sizeof(THGenerator)); memset(self, 0, sizeof(THGenerator)); self->left = 1; self->seeded = 0; self->normal_is_valid = 0; return self; } /* Creates new generator and makes sure it is seeded*/ THGenerator* THGenerator_new() { THGenerator *self = THGenerator_newUnseeded(); THRandom_seed(self); return self; } THGenerator* THGenerator_copy(THGenerator *self, THGenerator *from) { memcpy(self, from, sizeof(THGenerator)); return self; } void THGenerator_free(THGenerator *self) { THFree(self); } int THGenerator_isValid(THGenerator *_generator) { if ((_generator->seeded == 1) && (_generator->left > 0 && _generator->left <= n) && (_generator->next <= n)) return 1; return 0; } #ifndef _WIN32 static unsigned long readURandomLong() { int randDev = open("/dev/urandom", O_RDONLY); unsigned long randValue; if (randDev < 0) { THError("Unable to open /dev/urandom"); } ssize_t readBytes = read(randDev, &randValue, sizeof(randValue)); if (readBytes < sizeof(randValue)) { THError("Unable to read from /dev/urandom"); } close(randDev); return randValue; } #endif // _WIN32 unsigned long THRandom_seed(THGenerator *_generator) { #ifdef _WIN32 unsigned long s = (unsigned long)time(0); #else unsigned long s = readURandomLong(); #endif THRandom_manualSeed(_generator, s); return s; } /* The next 4 methods are taken from http:www.math.keio.ac.jpmatumotoemt.html Here is the copyright: Some minor modifications have been made to adapt to "my" C... */ /* A C-program for MT19937, with initialization improved 2002/2/10. Coded by Takuji Nishimura and Makoto Matsumoto. This is a faster version by taking Shawn Cokus's optimization, Matthe Bellew's simplification, Isaku Wada's double version. Before using, initialize the state by using init_genrand(seed) or init_by_array(init_key, key_length). Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The names of its contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Any feedback is very welcome. http://www.math.keio.ac.jp/matumoto/emt.html email: matumoto@math.keio.ac.jp */ /* Macros for the Mersenne Twister random generator... */ /* Period parameters */ /* #define n 624 */ /* #define m 397 */ #define MATRIX_A 0x9908b0dfUL /* constant vector a */ #define UMASK 0x80000000UL /* most significant w-r bits */ #define LMASK 0x7fffffffUL /* least significant r bits */ #define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) #define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL)) /*********************************************************** That's it. */ void THRandom_manualSeed(THGenerator *_generator, unsigned long the_seed_) { int j; /* This ensures reseeding resets all of the state (i.e. state for Gaussian numbers) */ THGenerator *blank = THGenerator_newUnseeded(); THGenerator_copy(_generator, blank); THGenerator_free(blank); _generator->the_initial_seed = the_seed_; _generator->state[0] = _generator->the_initial_seed & 0xffffffffUL; for(j = 1; j < n; j++) { _generator->state[j] = (1812433253UL * (_generator->state[j-1] ^ (_generator->state[j-1] >> 30)) + j); /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ /* In the previous versions, mSBs of the seed affect */ /* only mSBs of the array state[]. */ /* 2002/01/09 modified by makoto matsumoto */ _generator->state[j] &= 0xffffffffUL; /* for >32 bit machines */ } _generator->left = 1; _generator->seeded = 1; } unsigned long THRandom_initialSeed(THGenerator *_generator) { return _generator->the_initial_seed; } void THRandom_nextState(THGenerator *_generator) { unsigned long *p = _generator->state; int j; _generator->left = n; _generator->next = 0; for(j = n-m+1; --j; p++) *p = p[m] ^ TWIST(p[0], p[1]); for(j = m; --j; p++) *p = p[m-n] ^ TWIST(p[0], p[1]); *p = p[m-n] ^ TWIST(p[0], _generator->state[0]); } unsigned long THRandom_random(THGenerator *_generator) { unsigned long y; if (--(_generator->left) == 0) THRandom_nextState(_generator); y = *(_generator->state + (_generator->next)++); /* Tempering */ y ^= (y >> 11); y ^= (y << 7) & 0x9d2c5680UL; y ^= (y << 15) & 0xefc60000UL; y ^= (y >> 18); return y; } /* generates a random number on [0,1)-double-interval */ static double __uniform__(THGenerator *_generator) { /* divided by 2^32 */ return (double)THRandom_random(_generator) * (1.0/4294967296.0); } /********************************************************* Thanks *a lot* Takuji Nishimura and Makoto Matsumoto! Now my own code... *********************************************************/ double THRandom_uniform(THGenerator *_generator, double a, double b) { return(__uniform__(_generator) * (b - a) + a); } double THRandom_normal(THGenerator *_generator, double mean, double stdv) { THArgCheck(stdv > 0, 2, "standard deviation must be strictly positive"); /* This is known as the Box-Muller method */ if(!_generator->normal_is_valid) { _generator->normal_x = __uniform__(_generator); _generator->normal_y = __uniform__(_generator); _generator->normal_rho = sqrt(-2. * log(1.0-_generator->normal_y)); _generator->normal_is_valid = 1; } else _generator->normal_is_valid = 0; if(_generator->normal_is_valid) return _generator->normal_rho*cos(2.*M_PI*_generator->normal_x)*stdv+mean; else return _generator->normal_rho*sin(2.*M_PI*_generator->normal_x)*stdv+mean; } double THRandom_exponential(THGenerator *_generator, double lambda) { return(-1. / lambda * log(1-__uniform__(_generator))); } double THRandom_cauchy(THGenerator *_generator, double median, double sigma) { return(median + sigma * tan(M_PI*(__uniform__(_generator)-0.5))); } /* Faut etre malade pour utiliser ca. M'enfin. */ double THRandom_logNormal(THGenerator *_generator, double mean, double stdv) { THArgCheck(stdv > 0, 2, "standard deviation must be strictly positive"); return(exp(THRandom_normal(_generator, mean, stdv))); } int THRandom_geometric(THGenerator *_generator, double p) { THArgCheck(p > 0 && p < 1, 1, "must be > 0 and < 1"); return((int)(log(1-__uniform__(_generator)) / log(p)) + 1); } int THRandom_bernoulli(THGenerator *_generator, double p) { THArgCheck(p >= 0 && p <= 1, 1, "must be >= 0 and <= 1"); return(__uniform__(_generator) <= p); } lib/TH/THRandom.h000066400000000000000000000056661316246254300140070ustar00rootroot00000000000000#ifndef TH_RANDOM_INC #define TH_RANDOM_INC #include "THGeneral.h" #define _MERSENNE_STATE_N 624 #define _MERSENNE_STATE_M 397 /* A THGenerator contains all the state required for a single random number stream */ typedef struct THGenerator { /* The initial seed. */ unsigned long the_initial_seed; int left; /* = 1; */ int seeded; /* = 0; */ unsigned long next; unsigned long state[_MERSENNE_STATE_N]; /* the array for the state vector */ /********************************/ /* For normal distribution */ double normal_x; double normal_y; double normal_rho; int normal_is_valid; /* = 0; */ } THGenerator; #define torch_Generator "torch.Generator" /* Manipulate THGenerator objects */ TH_API THGenerator * THGenerator_new(void); TH_API THGenerator * THGenerator_copy(THGenerator *self, THGenerator *from); TH_API void THGenerator_free(THGenerator *gen); /* Checks if given generator is valid */ TH_API int THGenerator_isValid(THGenerator *_generator); /* Initializes the random number generator from /dev/urandom (or on Windows platforms with the current time (granularity: seconds)) and returns the seed. */ TH_API unsigned long THRandom_seed(THGenerator *_generator); /* Initializes the random number generator with the given long "the_seed_". */ TH_API void THRandom_manualSeed(THGenerator *_generator, unsigned long the_seed_); /* Returns the starting seed used. */ TH_API unsigned long THRandom_initialSeed(THGenerator *_generator); /* Generates a uniform 32 bits integer. */ TH_API unsigned long THRandom_random(THGenerator *_generator); /* Generates a uniform random number on [0,1[. */ TH_API double THRandom_uniform(THGenerator *_generator, double a, double b); /** Generates a random number from a normal distribution. (With mean #mean# and standard deviation #stdv >= 0#). */ TH_API double THRandom_normal(THGenerator *_generator, double mean, double stdv); /** Generates a random number from an exponential distribution. The density is $p(x) = lambda * exp(-lambda * x)$, where lambda is a positive number. */ TH_API double THRandom_exponential(THGenerator *_generator, double lambda); /** Returns a random number from a Cauchy distribution. The Cauchy density is $p(x) = sigma/(pi*(sigma^2 + (x-median)^2))$ */ TH_API double THRandom_cauchy(THGenerator *_generator, double median, double sigma); /** Generates a random number from a log-normal distribution. (#mean > 0# is the mean of the log-normal distribution and #stdv# is its standard deviation). */ TH_API double THRandom_logNormal(THGenerator *_generator, double mean, double stdv); /** Generates a random number from a geometric distribution. It returns an integer #i#, where $p(i) = (1-p) * p^(i-1)$. p must satisfy $0 < p < 1$. */ TH_API int THRandom_geometric(THGenerator *_generator, double p); /* Returns true with probability $p$ and false with probability $1-p$ (p > 0). */ TH_API int THRandom_bernoulli(THGenerator *_generator, double p); #endif lib/TH/THSize.c000066400000000000000000000007221316246254300134600ustar00rootroot00000000000000#include "THSize.h" int THSize_isSameSizeAs(const long *sizeA, long dimsA, const long *sizeB, long dimsB) { int d; if (dimsA != dimsB) return 0; for(d = 0; d < dimsA; ++d) { if(sizeA[d] != sizeB[d]) return 0; } return 1; } ptrdiff_t THSize_nElement(long dims, long *size) { if(dims == 0) return 0; else { ptrdiff_t nElement = 1; int d; for(d = 0; d < dims; d++) nElement *= size[d]; return nElement; } } lib/TH/THSize.h000066400000000000000000000006341316246254300134670ustar00rootroot00000000000000#ifndef TH_SIZE_INC #define TH_SIZE_INC #include "THGeneral.h" #include // THTensor functions that would work on a THSize if we had such a class in C++, // i.e. THTensor functions that depend only on the shape of the tensor, not the type. TH_API int THSize_isSameSizeAs(const long *sizeA, long dimsA, const long *sizeB, long dimsB); TH_API ptrdiff_t THSize_nElement(long dims, long *size); #endif lib/TH/THStorage.c000066400000000000000000000133221316246254300141520ustar00rootroot00000000000000#include "THAtomic.h" #include "THStorage.h" #include "generic/THStorage.c" #include "THGenerateAllTypes.h" #include "generic/THStorage.c" #include "THGenerateHalfType.h" #include "generic/THStorageCopy.c" #include "THGenerateAllTypes.h" #include "generic/THStorageCopy.c" #include "THGenerateHalfType.h" THDescBuff THLongStorage_sizeDesc(const THLongStorage *size) { return _THSizeDesc(size->data, size->size); } THLongStorage *THLongStorage_newInferSize(THLongStorage *size, ptrdiff_t nElement) { ptrdiff_t total_size = (size->size > 0 ? 1 : 0); ptrdiff_t dim_infer = -1; ptrdiff_t i; for (i = 0; i < size->size; i++) { if (size->data[i] == -1) { THArgCheck(dim_infer == -1, 1, "only one dimension can be inferred"); dim_infer = i; } else { total_size *= size->data[i]; } } if (dim_infer != -1) { THDescBuff buf = THLongStorage_sizeDesc(size); THArgCheck(total_size > 0 && nElement % total_size == 0, 2, "size '%s' is invalid for input with %td elements", buf.str, nElement); } else { THDescBuff buf = THLongStorage_sizeDesc(size); THArgCheck(nElement == total_size, 2, "size '%s' is invalid for input with %td elements", buf.str, nElement); } THLongStorage* copy = THLongStorage_newWithSize(size->size); THLongStorage_copy(copy, size); if (dim_infer != -1) { copy->data[dim_infer] = nElement / total_size; } return copy; } int THLongStorage_inferSize2(THLongStorage *output, long *sizesA, long dimsA, long *sizesB, long dimsB, char *error_buffer, int buffer_len) { THArgCheck(sizesA != NULL, 1, "sizesA must not be null"); THArgCheck(sizesB != NULL, 2, "sizesB must not be null"); THArgCheck(dimsA, 1, "Can't expand empty tensor a"); THArgCheck(dimsB, 1, "Can't expand empty tensor b"); ptrdiff_t ndim = dimsA > dimsB ? dimsA : dimsB; long *expandedSizes = THAlloc(sizeof(long)*ndim); for (long i = ndim - 1; i >= 0; --i) { long offset = ndim - 1 - i; long dimA = dimsA - 1 - offset; long dimB = dimsB - 1 - offset; long sizeA = (dimA >= 0) ? sizesA[dimA] : 1; long sizeB = (dimB >= 0) ? sizesB[dimB] : 1; if (sizeA == sizeB || sizeA == 1 || sizeB == 1) { expandedSizes[i] = THMax(sizeA, sizeB); } else { THFree(expandedSizes); snprintf(error_buffer, buffer_len, "The size of tensor a (%ld) must match the size of tensor b (%ld) at " "non-singleton dimension %ld.", sizeA, sizeB, i); return -1; } } THLongStorage_resize(output, ndim); memcpy(THLongStorage_data(output), expandedSizes, sizeof(long)*ndim); THFree(expandedSizes); return 0; } int THLongStorage_inferSizeN(THLongStorage *output, int n, long **sizes, long *dims, char *error_buffer, int buffer_len) { THArgCheck(n > 0, 2, "n must be greater than 0"); THArgCheck(sizes != NULL, 1, "sizes must not be null"); THArgCheck(dims != NULL, 1, "dims must not be null"); ptrdiff_t ndim = 0; for (int j = 0; j < n; ++j) { THArgCheck(sizes[ j ] != NULL, 1, "size %d must not be null", j); THArgCheck(dims[ j ], 1, "Can't expand empty tensor %d", j); ndim = dims[ j ] > ndim ? dims[ j ] : ndim; } long *expandedSizes = THAlloc(sizeof(long)*ndim); for (long i = ndim - 1; i >= 0; --i) { expandedSizes[ i ] = 1; long offset = ndim - 1 - i; for (int j = 0; j < n; ++j) { long dim = dims[ j ] - 1 - offset; long size = (dim >= 0) ? sizes[ j ][ dim ] : 1; if (size == expandedSizes[ i ] || size == 1 || expandedSizes[ i ] == 1) { expandedSizes[ i ] = THMax(expandedSizes[ i ], size); } else { THFree(expandedSizes); snprintf(error_buffer, buffer_len, "The size of tensor %i (%ld) must match the expanded size" "of tensor (%ld) at non-singleton dimension %ld.", j, size, expandedSizes[ i ], i); return -1; } } } THLongStorage_resize(output, ndim); memcpy(THLongStorage_data(output), expandedSizes, sizeof(long)*ndim); THFree(expandedSizes); return 0; } int THLongStorage_inferExpandGeometry(long *tensorSizes, long *tensorStrides, long tensorDim, THLongStorage *sizes, long **expandedSizes, long **expandedStrides, char *error_buffer, int buffer_len) { ptrdiff_t ndim = THLongStorage_size(sizes); long *expandedSizesCalc = THAlloc(sizeof(long)*ndim); long *expandedStridesCalc = THAlloc(sizeof(long)*ndim); // create a new geometry for the tensors for (long i = ndim - 1; i >= 0; --i) { long offset = ndim - 1 - i; long dim = tensorDim - 1 - offset; long size = (dim >= 0) ? tensorSizes[dim] : 1; long stride = (dim >= 0) ? tensorStrides[dim] : expandedSizesCalc[i + 1] * expandedStridesCalc[i+1]; long targetSize = THLongStorage_data(sizes)[i]; if (targetSize == -1) { if (dim < 0) { THFree(expandedSizesCalc); THFree(expandedStridesCalc); snprintf(error_buffer, buffer_len, "The expanded size of the tensor (%ld) isn't allowed in a leading, non-existing dimension %ld.", targetSize, i); return -1; } else { targetSize = size; } } if (size != targetSize) { if (size == 1) { size = targetSize; stride = 0; } else { THFree(expandedSizesCalc); THFree(expandedStridesCalc); snprintf(error_buffer, buffer_len, "The expanded size of the tensor (%ld) must match the existing size (%ld) at " "non-singleton dimension %ld.", targetSize, size, i); return -1; } } expandedSizesCalc[i] = size; expandedStridesCalc[i] = stride; } *expandedSizes = expandedSizesCalc; *expandedStrides = expandedStridesCalc; return 0; } lib/TH/THStorage.h000066400000000000000000000030221316246254300141530ustar00rootroot00000000000000#ifndef TH_STORAGE_INC #define TH_STORAGE_INC #include "THGeneral.h" #include "THAllocator.h" #define THStorage TH_CONCAT_3(TH,Real,Storage) #define THStorage_(NAME) TH_CONCAT_4(TH,Real,Storage_,NAME) /* fast access methods */ #define TH_STORAGE_GET(storage, idx) ((storage)->data[(idx)]) #define TH_STORAGE_SET(storage, idx, value) ((storage)->data[(idx)] = (value)) #include "generic/THStorage.h" #include "THGenerateAllTypes.h" #include "generic/THStorage.h" #include "THGenerateHalfType.h" #include "generic/THStorageCopy.h" #include "THGenerateAllTypes.h" #include "generic/THStorageCopy.h" #include "THGenerateHalfType.h" TH_API THDescBuff THLongStorage_sizeDesc(const THLongStorage *size); TH_API THLongStorage *THLongStorage_newInferSize(THLongStorage *size, ptrdiff_t nElement); // Given the sizes of {2,N} tensors, write out the size when the tensors are expanded together. TH_API int THLongStorage_inferSize2(THLongStorage *output, long *sizesA, long dimsA, long *sizesB, long dimsB, char *error_buffer, int buffer_len); TH_API int THLongStorage_inferSizeN(THLongStorage *output, int n, long **sizes, long *dims, char *error_buffer, int buffer_len); TH_API int THLongStorage_inferExpandGeometry(long *tensorSizes, long *tensorStrides, long tensorDim, THLongStorage *sizes, long **expandedSizes, long **expandedStrides, char *error_buffer, int buffer_len); #endif lib/TH/THTensor.c000066400000000000000000000013521316246254300140200ustar00rootroot00000000000000#include "THAtomic.h" #include "THTensor.h" #include "THVector.h" #include "generic/simd/simd.h" #include "THBlas.h" #include "THLapack.h" #include "THRandom.h" #include "THTensorDimApply.h" #include "THMath.h" #include "generic/THTensor.c" #include "THGenerateAllTypes.h" #include "generic/THTensor.c" #include "THGenerateHalfType.h" #include "generic/THTensorCopy.c" #include "THGenerateAllTypes.h" #include "generic/THTensorCopy.c" #include "THGenerateHalfType.h" #include "generic/THTensorRandom.c" #include "THGenerateAllTypes.h" #include "generic/THTensorMath.c" #include "THGenerateAllTypes.h" #include "generic/THTensorConv.c" #include "THGenerateAllTypes.h" #include "generic/THTensorLapack.c" #include "THGenerateFloatTypes.h" lib/TH/THTensor.h000066400000000000000000000015711316246254300140300ustar00rootroot00000000000000#ifndef TH_TENSOR_INC #define TH_TENSOR_INC #include "THStorage.h" #include "THTensorApply.h" #define THTensor TH_CONCAT_3(TH,Real,Tensor) #define THTensor_(NAME) TH_CONCAT_4(TH,Real,Tensor_,NAME) /* basics */ #include "generic/THTensor.h" #include "THGenerateAllTypes.h" #include "generic/THTensor.h" #include "THGenerateHalfType.h" #include "generic/THTensorCopy.h" #include "THGenerateAllTypes.h" #include "generic/THTensorCopy.h" #include "THGenerateHalfType.h" #include "THTensorMacros.h" /* random numbers */ #include "THRandom.h" #include "generic/THTensorRandom.h" #include "THGenerateAllTypes.h" /* maths */ #include "generic/THTensorMath.h" #include "THGenerateAllTypes.h" /* convolutions */ #include "generic/THTensorConv.h" #include "THGenerateAllTypes.h" /* lapack support */ #include "generic/THTensorLapack.h" #include "THGenerateFloatTypes.h" #endif lib/TH/THTensorApply.h000066400000000000000000000246721316246254300150450ustar00rootroot00000000000000#ifndef TH_TENSOR_APPLY_INC #define TH_TENSOR_APPLY_INC /* * The basic strategy for apply is as follows: * * 1. Starting with the outermost index, loop until we reach a dimension where the * data is no longer contiguous, i.e. the stride at that dimension is not equal to * the size of the tensor defined by the outer dimensions. Let's call this outer * (contiguous) tensor A. Note that if the Tensor is contiguous, then A is equal * to the entire Tensor. Let's call the inner tensor B. * * 2. We loop through the indices in B, starting at its outermost dimension. For * example, if B is a 2x2 matrix, then we do: * * B[0][0] * B[0][1] * B[1][0] * B[1][1] * * We set the offset into the underlying storage as (storageOffset + stride_B * index_B), * i.e. basically we compute the offset into the storage as we would normally for a * Tensor. But because we are guaranteed the subsequent data is contiguous in memory, we * can simply loop for sizeof(A) iterations and perform the operation, without having to * follow the order described by the strides of A. * * 3. As an optimization, we merge dimensions of A that are contiguous in memory. For * example, if A is a 3x3x3x3 tensor narrowed from a 3x3x4x3 tensor, then the first two * dimensions can be merged for the purposes of APPLY, reducing the number of nested * loops. */ #define __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, ALLOW_CONTIGUOUS) \ TYPE *TENSOR##_data = NULL; \ long *TENSOR##_counter = NULL, *TENSOR##_sizes = NULL, *TENSOR##_strides = NULL, *TENSOR##_dimOffset = NULL; \ long TENSOR##_stride = 0, TENSOR##_size = 0, TENSOR##_dim = 0, TENSOR##_i, TENSOR##_n; \ int TENSOR##_contiguous = ALLOW_CONTIGUOUS && DIM < 0; \ TENSOR##_n = (TENSOR->nDimension ? 1 : 0); \ for(TENSOR##_i = 0; TENSOR##_i < TENSOR->nDimension; TENSOR##_i++) \ TENSOR##_n *= TENSOR->size[TENSOR##_i]; \ \ if(TENSOR->nDimension == 0) \ TH_TENSOR_APPLY_hasFinished = 1; \ else \ { \ TENSOR##_data = TENSOR->storage->data+TENSOR->storageOffset; \ TENSOR##_size = 1; \ TENSOR##_stride = 1; \ for(TENSOR##_i = TENSOR->nDimension-1; TENSOR##_i >= 0; TENSOR##_i--) { \ if(TENSOR->size[TENSOR##_i] != 1) { \ if(TENSOR->stride[TENSOR##_i] == TENSOR##_size && TENSOR##_i != DIM) \ TENSOR##_size *= TENSOR->size[TENSOR##_i]; \ else{ \ TENSOR##_contiguous = 0; \ break; \ } \ } \ } \ if (!TENSOR##_contiguous) { \ /* Find the dimension of contiguous sections */ \ TENSOR##_dim = 1; \ for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; TENSOR##_i--) \ { \ if(TENSOR->stride[TENSOR##_i] != TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1] || TENSOR##_i == DIM || TENSOR##_i+1 == DIM) \ TENSOR##_dim++; \ } \ /* Allocate an array of 3*dim elements, where dim is the number of contiguous sections */ \ TENSOR##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR##_dim)); \ TENSOR##_sizes = TENSOR##_counter + TENSOR##_dim; \ TENSOR##_strides = TENSOR##_counter + 2*TENSOR##_dim; \ TH_TENSOR_dim_index = TENSOR##_dim-1; \ TENSOR##_dimOffset = (DIM == TENSOR->nDimension-1) ? &TENSOR##_i : &TENSOR##_counter[DIM]; \ TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR->nDimension-1]; \ TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride[TENSOR->nDimension-1]; \ /* TENSOR##_counter tracks where we are in the storage. The offset into the */ \ /* storage is given by storage_offset + (i * j), where i is the stride */ \ /* vector and j is tensor_counter vector. This sets the starting position for the loop. */ \ for(TENSOR##_i = TENSOR##_dim-1; TENSOR##_i >= 0; --TENSOR##_i) { \ TENSOR##_counter[TENSOR##_i] = 0; \ } \ for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; --TENSOR##_i) { \ if (TENSOR->stride[TENSOR##_i] == TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1] && TENSOR##_i != DIM && TENSOR##_i+1 != DIM) { \ TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR##_i] * TENSOR##_sizes[TH_TENSOR_dim_index]; \ if (DIM != TENSOR->nDimension-1 && TENSOR##_i < DIM) \ TENSOR##_dimOffset--; \ } else { \ --TH_TENSOR_dim_index; \ TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR##_i]; \ TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride[TENSOR##_i]; \ } \ } \ /* Size of the inner most section */ \ TENSOR##_size = TENSOR##_sizes[TENSOR##_dim-1]; \ /* Stride of the inner most section */ \ TENSOR##_stride = TENSOR##_strides[TENSOR##_dim-1]; \ } \ } \ TENSOR##_i = 0; #define __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, ALWAYS_UPDATE) \ if(TENSOR##_i == TENSOR##_size || ALWAYS_UPDATE) \ { \ if(TENSOR##_contiguous) \ break; \ \ if(TENSOR##_dim == 1) \ break; \ \ /* Reset pointer to beginning of loop */ \ TENSOR##_data -= TENSOR##_size*TENSOR##_stride; \ for(TENSOR##_i = TENSOR##_dim-2; TENSOR##_i >= 0; TENSOR##_i--) \ { \ TENSOR##_counter[TENSOR##_i]++; \ /* Jump ahread by the stride of this dimension */ \ TENSOR##_data += TENSOR##_strides[TENSOR##_i]; \ \ if(TENSOR##_counter[TENSOR##_i] == TENSOR##_sizes[TENSOR##_i]) \ { \ if(TENSOR##_i == 0) \ { \ TH_TENSOR_APPLY_hasFinished = 1; \ break; \ } \ else \ { \ /* Reset the pointer to the beginning of the chunk defined by this dimension */ \ TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR##_strides[TENSOR##_i]; \ TENSOR##_counter[TENSOR##_i] = 0; \ } \ } \ else \ break; \ } \ TENSOR##_i = 0; \ } \ #define TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIM, CODE) \ { \ int TH_TENSOR_APPLY_hasFinished = 0; \ long TH_TENSOR_dim_index = 0; \ __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \ __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \ __TH_TENSOR_APPLYX_PREAMBLE(TYPE3, TENSOR3, DIM, 1) \ \ int elements_equal = 1; \ if(TENSOR1##_n != TENSOR2##_n) { \ elements_equal = 0; \ } \ else if(TENSOR1##_n != TENSOR3##_n) { \ elements_equal = 0; \ } \ if (elements_equal == 0) { \ THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \ THError("inconsistent tensor size, expected %s %s, %s %s and %s %s to have the same " \ "number of elements, but got %d, %d and %d elements respectively", \ #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str, \ TENSOR1##_n, TENSOR2##_n, TENSOR3##_n); \ } \ \ while(!TH_TENSOR_APPLY_hasFinished) \ { \ /* Loop through the inner most region of the Tensor */ \ for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size && TENSOR3##_i < TENSOR3##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR3##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride, TENSOR3##_data += TENSOR3##_stride) /* 0 et pas TENSOR##_dim! */ \ { \ CODE \ } \ __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \ __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \ __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR3, 0) \ } \ if(TENSOR1##_counter != NULL) \ THFree(TENSOR1##_counter); \ if(TENSOR2##_counter != NULL) \ THFree(TENSOR2##_counter); \ if(TENSOR3##_counter != NULL) \ THFree(TENSOR3##_counter); \ } #define TH_TENSOR_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \ TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, -1, CODE) #define TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, DIM, CODE) \ { \ int TH_TENSOR_APPLY_hasFinished = 0; \ long TH_TENSOR_dim_index = 0; \ __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \ __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \ \ if(TENSOR1##_n != TENSOR2##_n) { \ THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ THError("inconsistent tensor size, expected %s %s and %s %s to have the same " \ "number of elements, but got %d and %d elements respectively", \ #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, TENSOR1##_n, TENSOR2##_n); \ } \ while(!TH_TENSOR_APPLY_hasFinished) \ { \ /* Loop through the inner most region of the Tensor */ \ for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride) /* 0 et pas TENSOR##_dim! */ \ { \ CODE \ } \ __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \ __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \ } \ if(TENSOR1##_counter != NULL) \ THFree(TENSOR1##_counter); \ if(TENSOR2##_counter != NULL) \ THFree(TENSOR2##_counter); \ } #define TH_TENSOR_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \ TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, -1, CODE) #define TH_TENSOR_APPLY_D(TYPE, TENSOR, DIM, CODE) \ { \ int TH_TENSOR_APPLY_hasFinished = 0; \ long TH_TENSOR_dim_index = 0; \ __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, 0) \ \ while(!TH_TENSOR_APPLY_hasFinished) \ { \ /* Loop through the inner most region of the Tensor */ \ for(; TENSOR##_i < TENSOR##_size; TENSOR##_i++, TENSOR##_data += TENSOR##_stride) /* 0 et pas TENSOR##_dim! */ \ { \ CODE \ } \ __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, 1) \ } \ THFree(TENSOR##_counter); \ } #define TH_TENSOR_APPLY(TYPE, TENSOR, CODE) \ TH_TENSOR_APPLY_D(TYPE, TENSOR, -1, CODE) #endif lib/TH/THTensorDimApply.h000066400000000000000000000320521316246254300154660ustar00rootroot00000000000000#ifndef TH_TENSOR_DIM_APPLY_INC #define TH_TENSOR_DIM_APPLY_INC #define TH_TENSOR_DIM_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIMENSION, CODE) \ { \ TYPE1 *TENSOR1##_data = NULL; \ long TENSOR1##_stride = 0, TENSOR1##_size = 0; \ TYPE2 *TENSOR2##_data = NULL; \ long TENSOR2##_stride = 0, TENSOR2##_size = 0; \ TYPE3 *TENSOR3##_data = NULL; \ long TENSOR3##_stride = 0, TENSOR3##_size = 0; \ long *TH_TENSOR_DIM_APPLY_counter = NULL; \ int TH_TENSOR_DIM_APPLY_hasFinished = 0; \ int TH_TENSOR_DIM_APPLY_i; \ \ if( (DIMENSION < 0) || (DIMENSION >= TENSOR1->nDimension) ) \ THError("invalid dimension %d (expected to be 0 <= dim < %d)", DIMENSION, TENSOR1->nDimension); \ int same_dims = 1; \ if( TENSOR1->nDimension != TENSOR2->nDimension ) { \ same_dims = 0; \ } \ if( TENSOR1->nDimension != TENSOR3->nDimension ) { \ same_dims = 0; \ } \ if (same_dims == 0) { \ THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \ THError("inconsistent tensor size, expected %s %s, %s %s and %s %s to have the same " \ "number of dimensions", #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str); \ } \ int shape_check_flag = 0; \ for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ { \ if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \ continue; \ if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR2->size[TH_TENSOR_DIM_APPLY_i]) \ shape_check_flag = 1; \ if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR3->size[TH_TENSOR_DIM_APPLY_i]) \ shape_check_flag = 1; \ } \ \ if (shape_check_flag == 1) { \ THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \ THError("Expected %s %s, %s %s and %s %s to have the same size in dimension %d", \ #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str, DIMENSION); \ } \ \ TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR1->nDimension)); \ for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ \ TENSOR1##_data = (TENSOR1)->storage->data+(TENSOR1)->storageOffset; \ TENSOR1##_stride = (TENSOR1)->stride[DIMENSION]; \ TENSOR1##_size = TENSOR1->size[DIMENSION]; \ \ TENSOR2##_data = (TENSOR2)->storage->data+(TENSOR2)->storageOffset; \ TENSOR2##_stride = (TENSOR2)->stride[DIMENSION]; \ TENSOR2##_size = TENSOR2->size[DIMENSION]; \ \ TENSOR3##_data = (TENSOR3)->storage->data+(TENSOR3)->storageOffset; \ TENSOR3##_stride = (TENSOR3)->stride[DIMENSION]; \ TENSOR3##_size = TENSOR3->size[DIMENSION]; \ \ while(!TH_TENSOR_DIM_APPLY_hasFinished) \ { \ CODE \ \ if(TENSOR1->nDimension == 1) \ break; \ \ for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ { \ if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \ { \ if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \ { \ TH_TENSOR_DIM_APPLY_hasFinished = 1; \ break; \ } \ continue; \ } \ \ TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \ TENSOR1##_data += TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \ TENSOR2##_data += TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \ TENSOR3##_data += TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \ \ if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR1->size[TH_TENSOR_DIM_APPLY_i]) \ { \ if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \ { \ TH_TENSOR_DIM_APPLY_hasFinished = 1; \ break; \ } \ else \ { \ TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \ TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \ TENSOR3##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \ TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ } \ } \ else \ break; \ } \ } \ THFree(TH_TENSOR_DIM_APPLY_counter); \ } /** * Similar to DIM_APPLY(...) but we maintain two sets of pointers: one for the first tensor * and one for the second. The two tensors must have the same shape, other than at the * specified DIMENSION. This function makes it easy to store the output from reducing the * TENSOR at index. For example, in the sum example described below, we could instead do: * * long i = 0; * TYPE1 sum; * * for (i = 0; i < TENSOR1##_size; ++i) { * sum += TENSOR1##_data[i * TENSOR1##_stride] * } * *TENSOR2##_data = (TYPE2) sum; * * In particular, we guarantee that the offset into TENSOR2 will be what you would get if * you applied all of the index values used to generate the offset into TENSOR1. */ #define TH_TENSOR_DIM_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, DIMENSION, CODE) \ { \ TYPE1 *TENSOR1##_data = NULL; \ long TENSOR1##_stride = 0, TENSOR1##_size = 0; \ TYPE2 *TENSOR2##_data = NULL; \ long TENSOR2##_stride = 0, TENSOR2##_size = 0; \ long *TH_TENSOR_DIM_APPLY_counter = NULL; \ int TH_TENSOR_DIM_APPLY_hasFinished = 0; \ int TH_TENSOR_DIM_APPLY_i; \ \ if( (DIMENSION < 0) || (DIMENSION >= TENSOR1->nDimension) ) \ THError("invalid dimension %d (expected to be 0 <= dim < %d)", DIMENSION, TENSOR1->nDimension); \ if( TENSOR1->nDimension != TENSOR2->nDimension ) { \ THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ THError("inconsistent tensor size, expected %s %s and %s %s to have the same " \ "number of dimensions", #TENSOR1, T1buff.str, #TENSOR2, T2buff.str); \ } \ int shape_check_flag = 0; \ for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ { \ if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \ continue; \ if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR2->size[TH_TENSOR_DIM_APPLY_i]) { \ THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ THError("Expected %s %s and %s %s to have the same size in dimension %d", \ #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, DIMENSION); \ } \ } \ \ TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR1->nDimension)); \ for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ \ TENSOR1##_data = (TENSOR1)->storage->data+(TENSOR1)->storageOffset; \ TENSOR1##_stride = (TENSOR1)->stride[DIMENSION]; \ TENSOR1##_size = TENSOR1->size[DIMENSION]; \ \ TENSOR2##_data = (TENSOR2)->storage->data+(TENSOR2)->storageOffset; \ TENSOR2##_stride = (TENSOR2)->stride[DIMENSION]; \ TENSOR2##_size = TENSOR2->size[DIMENSION]; \ \ while(!TH_TENSOR_DIM_APPLY_hasFinished) \ { \ CODE \ \ if(TENSOR1->nDimension == 1) \ break; \ \ for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ { \ if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \ { \ if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \ { \ TH_TENSOR_DIM_APPLY_hasFinished = 1; \ break; \ } \ continue; \ } \ \ TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \ TENSOR1##_data += TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \ TENSOR2##_data += TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \ \ if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR1->size[TH_TENSOR_DIM_APPLY_i]) \ { \ if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \ { \ TH_TENSOR_DIM_APPLY_hasFinished = 1; \ break; \ } \ else \ { \ TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \ TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \ TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ } \ } \ else \ break; \ } \ } \ THFree(TH_TENSOR_DIM_APPLY_counter); \ } /** * The basic idea for DIM_APPLY: Given a TENSOR and a DIMENSION, provide access to the data stored * at all sets of dimension values other than DIMENSION, such that we can get all the values at those * fixed indices for the various values at DIMENSION. * * Suppose we have a 2x3x4 Tensor A, and we have DIMENSION=2. Then we will hit CODE (2x3) times, and the * pointer into storage will be at: * * A[0][0] * A[0][1] * A[0][2] * A[1][0] * A[1][1] * A[1][2] * * And at each point, we can access the data for each of the four elements of the Tensor via * TENSOR##_stride. So for example, if we wanted to sum the elements there, we could do: * * long i = 0; * TYPE sum; * for (i = 0; i < TENSOR##_size; i++) { * sum += TENSOR##_data[i * TENSOR##_stride] * } * * Note that we don't have to have DIMENSION be the last tensor. If we have DIMENSION=1, then we will hit the * code (2x4) times, with pointer into the storage at: * * offset + * stride_0 * 0 + stride_2 * 0 * stride_0 * 1 + stride_2 * 0 * stride_0 * 0 + stride_2 * 1 * stride_0 * 1 + stride_2 * 1 * stride_0 * 0 + stride_2 * 2 * stride_0 * 1 + stride_2 * 2 * stride_0 * 0 + stride_2 * 3 * stride_0 * 1 + stride_2 * 3 * * So we can again sum over the values at DIMENSION with the other indices fixed. */ #define TH_TENSOR_DIM_APPLY(TYPE, TENSOR, DIMENSION, CODE) \ { \ TYPE *TENSOR##_data = NULL; \ long TENSOR##_stride = 0, TENSOR##_size = 0; \ long *TH_TENSOR_DIM_APPLY_counter = NULL; \ int TH_TENSOR_DIM_APPLY_hasFinished = 0; \ int TH_TENSOR_DIM_APPLY_i; \ \ if( (DIMENSION < 0) || (DIMENSION >= TENSOR->nDimension) ) \ THError("invalid dimension"); \ \ TENSOR##_data = (TENSOR)->storage->data+(TENSOR)->storageOffset; \ TENSOR##_stride = (TENSOR)->stride[DIMENSION]; \ TENSOR##_size = TENSOR->size[DIMENSION]; \ /* Counter stores the indices into the Tensor at any time */ \ TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR->nDimension)); \ for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR->nDimension; TH_TENSOR_DIM_APPLY_i++) \ TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ \ while(!TH_TENSOR_DIM_APPLY_hasFinished) \ { \ CODE \ \ if(TENSOR->nDimension == 1) \ break; \ \ for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR->nDimension; TH_TENSOR_DIM_APPLY_i++) \ { \ /* Check if the index is equal to DIMENSION. We don't need to update the */ \ /* offset if this is the case, and can consider the next index. However, */ \ /* in the case that the DIMENSION is the last index in the Tensor, then */ \ /* we have parsed the entire tensor and can exit */ \ if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \ { \ if(TH_TENSOR_DIM_APPLY_i == TENSOR->nDimension-1) \ { \ TH_TENSOR_DIM_APPLY_hasFinished = 1; \ break; \ } \ continue; \ } \ \ /* Bump the counter at this index, update the pointer */ \ TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \ TENSOR##_data += TENSOR->stride[TH_TENSOR_DIM_APPLY_i]; \ \ if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR->size[TH_TENSOR_DIM_APPLY_i]) \ { \ /* Handled TENSOR_size(dim) iterations for DIM_APPLY_i. If this is the last dimension, exit */ \ if(TH_TENSOR_DIM_APPLY_i == TENSOR->nDimension-1) \ { \ TH_TENSOR_DIM_APPLY_hasFinished = 1; \ break; \ } \ else \ { \ /* Reset the counter, and the pointer to the beginning of the storage for this combination of indices */ \ TENSOR##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR->stride[TH_TENSOR_DIM_APPLY_i]; \ TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ } \ } \ else \ break; \ } \ } \ THFree(TH_TENSOR_DIM_APPLY_counter); \ } #endif lib/TH/THTensorMacros.h000066400000000000000000000031121316246254300151660ustar00rootroot00000000000000#ifndef TH_TENSOR_MACROS_INC #define TH_TENSOR_MACROS_INC /* fast method to access to tensor data */ #define THTensor_fastGet1d(self, x0) \ (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]]) #define THTensor_fastGet2d(self, x0, x1) \ (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]]) #define THTensor_fastGet3d(self, x0, x1, x2) \ (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]]) #define THTensor_fastGet4d(self, x0, x1, x2, x3) \ (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]+(x3)*(self)->stride[3]]) #define THTensor_fastSet1d(self, x0, value) \ (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]] = value) #define THTensor_fastSet2d(self, x0, x1, value) \ (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]] = value) #define THTensor_fastSet3d(self, x0, x1, x2, value) \ (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]] = value) #define THTensor_fastSet4d(self, x0, x1, x2, x3, value) \ (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]+(x3)*(self)->stride[3]] = value) #endif lib/TH/THVector.c000066400000000000000000000010521316246254300140050ustar00rootroot00000000000000#include "THVector.h" #include "generic/simd/simd.h" #ifdef __NEON__ #include "vector/NEON.c" #endif #ifdef __PPC64__ #include "vector/VSX.c" #endif #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ || defined(USE_SSE4_1) || defined(USE_SSE4_2) #include "vector/SSE.c" #endif #if defined(USE_AVX) #include "vector/AVX.h" #endif #if defined(USE_AVX2) #include "vector/AVX2.h" #endif #include "generic/THVectorDefault.c" #include "THGenerateAllTypes.h" #include "generic/THVectorDispatch.c" #include "THGenerateAllTypes.h" lib/TH/THVector.h000066400000000000000000000005051316246254300140140ustar00rootroot00000000000000#ifndef TH_VECTOR_INC #define TH_VECTOR_INC #include "THGeneral.h" #define THVector_(NAME) TH_CONCAT_4(TH,Real,Vector_,NAME) /* We are going to use dynamic dispatch, and want only to generate declarations * of the vector functions */ #include "generic/THVector.h" #include "THGenerateAllTypes.h" #endif // TH_VECTOR_INC lib/TH/cmake/000077500000000000000000000000001316246254300132255ustar00rootroot00000000000000lib/TH/cmake/FindARM.cmake000066400000000000000000000062661316246254300154610ustar00rootroot00000000000000# Check if the processor is an ARM and if Neon instruction are available on the machine where # the project is compiled. IF(CMAKE_SYSTEM_NAME MATCHES "Linux") EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) #neon instruction can be found on the majority part of modern ARM processor STRING(REGEX REPLACE "^.*(neon).*$" "\\1" NEON_THERE ${CPUINFO}) STRING(COMPARE EQUAL "neon" "${NEON_THERE}" NEON_TRUE) IF (NEON_TRUE) set(NEON_FOUND true CACHE BOOL "NEON available on host") ELSE (NEON_TRUE) set(NEON_FOUND false CACHE BOOL "NEON available on host") ENDIF (NEON_TRUE) # on ARMv8, neon is inherit and instead listed as 'asimd' in /proc/cpuinfo STRING(REGEX REPLACE "^.*(asimd).*$" "\\1" ASIMD_THERE ${CPUINFO}) STRING(COMPARE EQUAL "asimd" "${ASIMD_THERE}" ASIMD_TRUE) IF (ASIMD_TRUE) set(ASIMD_FOUND true CACHE BOOL "ASIMD/NEON available on host") ELSE (ASIMD_TRUE) set(ASIMD_FOUND false CACHE BOOL "ASIMD/NEON available on host") ENDIF (ASIMD_TRUE) #Find the processor type (for now OMAP3 or OMAP4) STRING(REGEX REPLACE "^.*(OMAP3).*$" "\\1" OMAP3_THERE ${CPUINFO}) STRING(COMPARE EQUAL "OMAP3" "${OMAP3_THERE}" OMAP3_TRUE) IF (OMAP3_TRUE) set(CORTEXA8_FOUND true CACHE BOOL "OMAP3 available on host") ELSE (OMAP3_TRUE) set(CORTEXA8_FOUND false CACHE BOOL "OMAP3 available on host") ENDIF (OMAP3_TRUE) #Find the processor type (for now OMAP3 or OMAP4) STRING(REGEX REPLACE "^.*(OMAP4).*$" "\\1" OMAP4_THERE ${CPUINFO}) STRING(COMPARE EQUAL "OMAP4" "${OMAP4_THERE}" OMAP4_TRUE) IF (OMAP4_TRUE) set(CORTEXA9_FOUND true CACHE BOOL "OMAP4 available on host") ELSE (OMAP4_TRUE) set(CORTEXA9_FOUND false CACHE BOOL "OMAP4 available on host") ENDIF (OMAP4_TRUE) ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE CPUINFO) #neon instruction can be found on the majority part of modern ARM processor STRING(REGEX REPLACE "^.*(neon).*$" "\\1" NEON_THERE ${CPUINFO}) STRING(COMPARE EQUAL "neon" "${NEON_THERE}" NEON_TRUE) IF (NEON_TRUE) set(NEON_FOUND true CACHE BOOL "NEON available on host") ELSE (NEON_TRUE) set(NEON_FOUND false CACHE BOOL "NEON available on host") ENDIF (NEON_TRUE) ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") # TODO set(CORTEXA8_FOUND false CACHE BOOL "OMAP3 not available on host") set(CORTEXA9_FOUND false CACHE BOOL "OMAP4 not available on host") set(NEON_FOUND false CACHE BOOL "NEON not available on host") ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") set(CORTEXA8_FOUND false CACHE BOOL "OMAP3 not available on host") set(CORTEXA9_FOUND false CACHE BOOL "OMAP4 not available on host") set(NEON_FOUND false CACHE BOOL "NEON not available on host") ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") if(NOT NEON_FOUND) MESSAGE(STATUS "Could not find hardware support for NEON on this machine.") endif(NOT NEON_FOUND) if(NOT CORTEXA8_FOUND) MESSAGE(STATUS "No OMAP3 processor on this machine.") endif(NOT CORTEXA8_FOUND) if(NOT CORTEXA9_FOUND) MESSAGE(STATUS "No OMAP4 processor on this machine.") endif(NOT CORTEXA9_FOUND) mark_as_advanced(NEON_FOUND) lib/TH/cmake/FindBLAS.cmake000066400000000000000000000211711316246254300155530ustar00rootroot00000000000000# - Find BLAS library # This module finds an installed fortran library that implements the BLAS # linear-algebra interface (see http://www.netlib.org/blas/). # The list of libraries searched for is taken # from the autoconf macro file, acx_blas.m4 (distributed at # http://ac-archive.sourceforge.net/ac-archive/acx_blas.html). # # This module sets the following variables: # BLAS_FOUND - set to true if a library implementing the BLAS interface is found. # BLAS_INFO - name of the detected BLAS library. # BLAS_F2C - set to true if following the f2c return convention # BLAS_LIBRARIES - list of libraries to link against to use BLAS # BLAS_INCLUDE_DIR - include directory # Do nothing is BLAS was found before IF(NOT BLAS_FOUND) SET(BLAS_LIBRARIES) SET(BLAS_INCLUDE_DIR) SET(BLAS_INFO) SET(BLAS_F2C) SET(WITH_BLAS "" CACHE STRING "Blas type [mkl/open/goto/acml/atlas/accelerate/veclib/generic]") # Old FindBlas INCLUDE(CheckCSourceRuns) INCLUDE(CheckFortranFunctionExists) MACRO(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list) # This macro checks for the existence of the combination of fortran libraries # given by _list. If the combination is found, this macro checks (using the # Check_Fortran_Function_Exists macro) whether can link against that library # combination using the name of a routine given by _name using the linker # flags given by _flags. If the combination of libraries is found and passes # the link test, LIBRARIES is set to the list of complete library paths that # have been found. Otherwise, LIBRARIES is set to NOTFOUND. # N.B. _prefix is the prefix applied to the names of all cached variables that # are generated internally and marked advanced by this macro. set(__list) foreach(_elem ${_list}) if(__list) set(__list "${__list} - ${_elem}") else(__list) set(__list "${_elem}") endif(__list) endforeach(_elem) message(STATUS "Checking for [${__list}]") set(_libraries_work TRUE) set(${LIBRARIES}) set(_combined_name) foreach(_library ${_list}) set(_combined_name ${_combined_name}_${_library}) if(_libraries_work) if ( WIN32 ) find_library(${_prefix}_${_library}_LIBRARY NAMES ${_library} PATHS ENV LIB PATHS ENV PATH ) endif ( WIN32 ) if ( APPLE ) find_library(${_prefix}_${_library}_LIBRARY NAMES ${_library} PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV DYLD_LIBRARY_PATH ) else ( APPLE ) find_library(${_prefix}_${_library}_LIBRARY NAMES ${_library} PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV LD_LIBRARY_PATH ) endif( APPLE ) mark_as_advanced(${_prefix}_${_library}_LIBRARY) set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) set(_libraries_work ${${_prefix}_${_library}_LIBRARY}) MESSAGE(STATUS " Library ${_library}: ${${_prefix}_${_library}_LIBRARY}") endif(_libraries_work) endforeach(_library ${_list}) if(_libraries_work) # Test this combination of libraries. set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}}) if (CMAKE_Fortran_COMPILER_WORKS) check_fortran_function_exists(${_name} ${_prefix}${_combined_name}_WORKS) else (CMAKE_Fortran_COMPILER_WORKS) check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) endif (CMAKE_Fortran_COMPILER_WORKS) set(CMAKE_REQUIRED_LIBRARIES) mark_as_advanced(${_prefix}${_combined_name}_WORKS) set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) endif(_libraries_work) if(NOT _libraries_work) set(${LIBRARIES} NOTFOUND) endif(NOT _libraries_work) endmacro(Check_Fortran_Libraries) # Intel MKL? if((NOT BLAS_LIBRARIES) AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "mkl"))) FIND_PACKAGE(MKL) IF(MKL_FOUND) SET(BLAS_INFO "mkl") SET(BLAS_LIBRARIES ${MKL_LIBRARIES}) SET(BLAS_INCLUDE_DIR ${MKL_INCLUDE_DIR}) SET(BLAS_VERSION ${MKL_VERSION}) ENDIF(MKL_FOUND) endif() if((NOT BLAS_LIBRARIES) AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "open"))) check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" "openblas") if(BLAS_LIBRARIES) set(BLAS_INFO "open") endif(BLAS_LIBRARIES) endif() if((NOT BLAS_LIBRARIES) AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "open"))) check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" "openblas;pthread") if(BLAS_LIBRARIES) set(BLAS_INFO "open") endif(BLAS_LIBRARIES) endif() if((NOT BLAS_LIBRARIES) AND (WIN32) AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "open"))) check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" "libopenblas") if(BLAS_LIBRARIES) set(BLAS_INFO "open") endif(BLAS_LIBRARIES) endif() if((NOT BLAS_LIBRARIES) AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "goto"))) check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" "goto2;gfortran") if (BLAS_LIBRARIES) set(BLAS_INFO "goto") endif (BLAS_LIBRARIES) endif() if((NOT BLAS_LIBRARIES) AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "goto"))) check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" "goto2;gfortran;pthread") if (BLAS_LIBRARIES) set(BLAS_INFO "goto") endif (BLAS_LIBRARIES) endif() if((NOT BLAS_LIBRARIES) AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "acml"))) check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" "acml;gfortran") if (BLAS_LIBRARIES) set(BLAS_INFO "acml") endif (BLAS_LIBRARIES) endif() # Apple BLAS library? if((NOT BLAS_LIBRARIES) AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "accelerate"))) check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" "Accelerate") if (BLAS_LIBRARIES) set(BLAS_INFO "accelerate") set(BLAS_IS_ACCELERATE 1) endif (BLAS_LIBRARIES) endif() if((NOT BLAS_LIBRARIES) AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "veclib"))) check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" "vecLib") if (BLAS_LIBRARIES) set(BLAS_INFO "veclib") endif (BLAS_LIBRARIES) endif() # BLAS in ATLAS library? (http://math-atlas.sourceforge.net/) if((NOT BLAS_LIBRARIES) AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "atlas"))) check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" "ptf77blas;atlas;gfortran") if (BLAS_LIBRARIES) set(BLAS_INFO "atlas") endif (BLAS_LIBRARIES) endif() # Generic BLAS library? if((NOT BLAS_LIBRARIES) AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "generic"))) check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" "blas") if (BLAS_LIBRARIES) set(BLAS_INFO "generic") endif (BLAS_LIBRARIES) endif() # Determine if blas was compiled with the f2c conventions IF (BLAS_LIBRARIES) SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) CHECK_C_SOURCE_RUNS(" #include #include float x[4] = { 1, 2, 3, 4 }; float y[4] = { .1, .01, .001, .0001 }; int four = 4; int one = 1; extern double sdot_(); int main() { int i; double r = sdot_(&four, x, &one, y, &one); exit((float)r != (float).1234); }" BLAS_F2C_DOUBLE_WORKS ) CHECK_C_SOURCE_RUNS(" #include #include float x[4] = { 1, 2, 3, 4 }; float y[4] = { .1, .01, .001, .0001 }; int four = 4; int one = 1; extern float sdot_(); int main() { int i; double r = sdot_(&four, x, &one, y, &one); exit((float)r != (float).1234); }" BLAS_F2C_FLOAT_WORKS ) IF (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS) MESSAGE(STATUS "This BLAS uses the F2C return conventions") SET(BLAS_F2C TRUE) ELSE (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS) SET(BLAS_F2C FALSE) ENDIF (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS) CHECK_C_SOURCE_RUNS(" #include #include float x[4] = { 1, 2, 3, 4 }; float y[4] = { .1, .01, .001, .0001 }; extern float cblas_sdot(); int main() { int i; double r = cblas_sdot(4, x, 1, y, 1); exit((float)r != (float).1234); }" BLAS_USE_CBLAS_DOT ) IF (BLAS_USE_CBLAS_DOT) SET(BLAS_USE_CBLAS_DOT TRUE) ELSE (BLAS_USE_CBLAS_DOT) SET(BLAS_USE_CBLAS_DOT FALSE) ENDIF (BLAS_USE_CBLAS_DOT) ENDIF(BLAS_LIBRARIES) # epilogue if(BLAS_LIBRARIES) set(BLAS_FOUND TRUE) else(BLAS_LIBRARIES) set(BLAS_FOUND FALSE) endif(BLAS_LIBRARIES) IF (NOT BLAS_FOUND AND BLAS_FIND_REQUIRED) message(FATAL_ERROR "Cannot find a library with BLAS API. Please specify library location.") ENDIF (NOT BLAS_FOUND AND BLAS_FIND_REQUIRED) IF(NOT BLAS_FIND_QUIETLY) IF(BLAS_FOUND) MESSAGE(STATUS "Found a library with BLAS API (${BLAS_INFO}).") ELSE(BLAS_FOUND) MESSAGE(STATUS "Cannot find a library with BLAS API. Not using BLAS.") ENDIF(BLAS_FOUND) ENDIF(NOT BLAS_FIND_QUIETLY) # Do nothing is BLAS was found before ENDIF(NOT BLAS_FOUND) lib/TH/cmake/FindLAPACK.cmake000066400000000000000000000151441316246254300157700ustar00rootroot00000000000000# - Find LAPACK library # This module finds an installed fortran library that implements the LAPACK # linear-algebra interface (see http://www.netlib.org/lapack/). # # The approach follows that taken for the autoconf macro file, acx_lapack.m4 # (distributed at http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html). # # This module sets the following variables: # LAPACK_FOUND - set to true if a library implementing the LAPACK interface is found # LAPACK_LIBRARIES - list of libraries (using full path name) for LAPACK # Note: I do not think it is a good idea to mixup different BLAS/LAPACK versions # Hence, this script wants to find a Lapack library matching your Blas library # Do nothing if LAPACK was found before IF(NOT LAPACK_FOUND) SET(LAPACK_LIBRARIES) SET(LAPACK_INFO) IF(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) FIND_PACKAGE(BLAS) ELSE(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) FIND_PACKAGE(BLAS REQUIRED) ENDIF(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) # Old search lapack script include(CheckFortranFunctionExists) macro(Check_Lapack_Libraries LIBRARIES _prefix _name _flags _list _blas) # This macro checks for the existence of the combination of fortran libraries # given by _list. If the combination is found, this macro checks (using the # Check_Fortran_Function_Exists macro) whether can link against that library # combination using the name of a routine given by _name using the linker # flags given by _flags. If the combination of libraries is found and passes # the link test, LIBRARIES is set to the list of complete library paths that # have been found. Otherwise, LIBRARIES is set to FALSE. # N.B. _prefix is the prefix applied to the names of all cached variables that # are generated internally and marked advanced by this macro. set(_libraries_work TRUE) set(${LIBRARIES}) set(_combined_name) foreach(_library ${_list}) set(_combined_name ${_combined_name}_${_library}) if(_libraries_work) if (WIN32) find_library(${_prefix}_${_library}_LIBRARY NAMES ${_library} PATHS ENV LIB PATHS ENV PATH) else (WIN32) if(APPLE) find_library(${_prefix}_${_library}_LIBRARY NAMES ${_library} PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV DYLD_LIBRARY_PATH) else(APPLE) find_library(${_prefix}_${_library}_LIBRARY NAMES ${_library} PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV LD_LIBRARY_PATH) endif(APPLE) endif(WIN32) mark_as_advanced(${_prefix}_${_library}_LIBRARY) set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) set(_libraries_work ${${_prefix}_${_library}_LIBRARY}) endif(_libraries_work) endforeach(_library ${_list}) if(_libraries_work) # Test this combination of libraries. set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas}) if (CMAKE_Fortran_COMPILER_WORKS) check_fortran_function_exists(${_name} ${_prefix}${_combined_name}_WORKS) else (CMAKE_Fortran_COMPILER_WORKS) check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) endif (CMAKE_Fortran_COMPILER_WORKS) set(CMAKE_REQUIRED_LIBRARIES) mark_as_advanced(${_prefix}${_combined_name}_WORKS) set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) endif(_libraries_work) if(NOT _libraries_work) set(${LIBRARIES} FALSE) endif(NOT _libraries_work) endmacro(Check_Lapack_Libraries) if(BLAS_FOUND) # Intel MKL IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "mkl")) IF(MKL_LAPACK_LIBRARIES) SET(LAPACK_LIBRARIES ${MKL_LAPACK_LIBRARIES} ${MKL_LIBRARIES}) ELSE(MKL_LAPACK_LIBRARIES) SET(LAPACK_LIBRARIES ${MKL_LIBRARIES}) ENDIF(MKL_LAPACK_LIBRARIES) SET(LAPACK_INCLUDE_DIR ${MKL_INCLUDE_DIR}) SET(LAPACK_INFO "mkl") ENDIF() # OpenBlas IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "open")) SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) check_function_exists("cheev_" OPEN_LAPACK_WORKS) if(OPEN_LAPACK_WORKS) SET(LAPACK_INFO "open") else() message(STATUS "It seems OpenBlas has not been compiled with Lapack support") endif() endif() # GotoBlas IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "goto")) SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) check_function_exists("cheev_" GOTO_LAPACK_WORKS) if(GOTO_LAPACK_WORKS) SET(LAPACK_INFO "goto") else() message(STATUS "It seems GotoBlas has not been compiled with Lapack support") endif() endif() # ACML IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "acml")) SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) check_function_exists("cheev_" ACML_LAPACK_WORKS) if(ACML_LAPACK_WORKS) SET(LAPACK_INFO "acml") else() message(STATUS "Strangely, this ACML library does not support Lapack?!") endif() endif() # Accelerate IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "accelerate")) SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) check_function_exists("cheev_" ACCELERATE_LAPACK_WORKS) if(ACCELERATE_LAPACK_WORKS) SET(LAPACK_INFO "accelerate") else() message(STATUS "Strangely, this Accelerate library does not support Lapack?!") endif() endif() # vecLib IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "veclib")) SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) check_function_exists("cheev_" VECLIB_LAPACK_WORKS) if(VECLIB_LAPACK_WORKS) SET(LAPACK_INFO "veclib") else() message(STATUS "Strangely, this vecLib library does not support Lapack?!") endif() endif() # Generic LAPACK library? IF((NOT LAPACK_INFO) AND ((BLAS_INFO STREQUAL "generic") OR (BLAS_INFO STREQUAL "open"))) check_lapack_libraries( LAPACK_LIBRARIES LAPACK cheev "" "lapack" "${BLAS_LIBRARIES}" ) if(LAPACK_LIBRARIES) SET(LAPACK_INFO "generic") endif(LAPACK_LIBRARIES) endif() else(BLAS_FOUND) message(STATUS "LAPACK requires BLAS") endif(BLAS_FOUND) if(LAPACK_INFO) set(LAPACK_FOUND TRUE) else(LAPACK_INFO) set(LAPACK_FOUND FALSE) endif(LAPACK_INFO) IF (NOT LAPACK_FOUND AND LAPACK_FIND_REQUIRED) message(FATAL_ERROR "Cannot find a library with LAPACK API. Please specify library location.") ENDIF (NOT LAPACK_FOUND AND LAPACK_FIND_REQUIRED) IF(NOT LAPACK_FIND_QUIETLY) IF(LAPACK_FOUND) MESSAGE(STATUS "Found a library with LAPACK API. (${LAPACK_INFO})") ELSE(LAPACK_FOUND) MESSAGE(STATUS "Cannot find a library with LAPACK API. Not using LAPACK.") ENDIF(LAPACK_FOUND) ENDIF(NOT LAPACK_FIND_QUIETLY) # Do nothing if LAPACK was found before ENDIF(NOT LAPACK_FOUND) lib/TH/cmake/FindMKL.cmake000066400000000000000000000225461316246254300154640ustar00rootroot00000000000000# - Find INTEL MKL library # # This module finds the Intel Mkl libraries. # # This module sets the following variables: # MKL_FOUND - set to true if a library implementing the CBLAS interface is found # MKL_VERSION - best guess # MKL_INCLUDE_DIR - path to include dir. # MKL_LIBRARIES - list of libraries for base mkl # MKL_LAPACK_LIBRARIES - list of libraries to add for lapack # MKL_SCALAPACK_LIBRARIES - list of libraries to add for scalapack # MKL_SOLVER_LIBRARIES - list of libraries to add for the solvers # MKL_CDFT_LIBRARIES - list of libraries to add for the solvers # Do nothing if MKL_FOUND was set before! IF (NOT MKL_FOUND) SET(MKL_VERSION) SET(MKL_INCLUDE_DIR) SET(MKL_LIBRARIES) SET(MKL_LAPACK_LIBRARIES) SET(MKL_SCALAPACK_LIBRARIES) SET(MKL_SOLVER_LIBRARIES) SET(MKL_CDFT_LIBRARIES) # Includes INCLUDE(CheckTypeSize) INCLUDE(CheckFunctionExists) # Intel Compiler Suite SET(INTEL_COMPILER_DIR CACHE STRING "Root directory of the Intel Compiler Suite (contains ipp, mkl, etc.)") SET(INTEL_MKL_DIR CACHE STRING "Root directory of the Intel MKL (standalone)") SET(INTEL_MKL_SEQUENTIAL OFF CACHE BOOL "Force using the sequential (non threaded) libraries") # Checks CHECK_TYPE_SIZE("void*" SIZE_OF_VOIDP) IF ("${SIZE_OF_VOIDP}" EQUAL 8) SET(mklvers "em64t") SET(iccvers "intel64") SET(mkl64s "_lp64") ELSE ("${SIZE_OF_VOIDP}" EQUAL 8) SET(mklvers "32") SET(iccvers "ia32") SET(mkl64s) ENDIF ("${SIZE_OF_VOIDP}" EQUAL 8) IF(CMAKE_COMPILER_IS_GNUCC) SET(mklthreads "mkl_gnu_thread" "mkl_intel_thread") SET(mklifaces "gf" "intel") SET(mklrtls "gomp" "iomp5") ELSE(CMAKE_COMPILER_IS_GNUCC) SET(mklthreads "mkl_intel_thread") SET(mklifaces "intel") SET(mklrtls "iomp5" "guide") IF (MSVC) SET(mklrtls "libiomp5md") ENDIF (MSVC) ENDIF (CMAKE_COMPILER_IS_GNUCC) # Kernel libraries dynamically loaded SET(mklkerlibs "mc" "mc3" "nc" "p4n" "p4m" "p4m3" "p4p" "def") SET(mklseq) # Paths SET(saved_CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}) SET(saved_CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH}) IF (INTEL_COMPILER_DIR) # TODO: diagnostic if dir does not exist SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${INTEL_COMPILER_DIR}/lib/${iccvers}") IF (NOT INTEL_MKL_DIR) SET(INTEL_MKL_DIR "${INTEL_COMPILER_DIR}/mkl") ENDIF (NOT INTEL_MKL_DIR) ENDIF (INTEL_COMPILER_DIR) IF (INTEL_MKL_DIR) # TODO: diagnostic if dir does not exist SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${INTEL_MKL_DIR}/include") SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${INTEL_MKL_DIR}/lib/${mklvers}") IF (MSVC) SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${INTEL_MKL_DIR}/lib/${iccvers}") ENDIF (MSVC) ENDIF (INTEL_MKL_DIR) # Try linking multiple libs MACRO(CHECK_ALL_LIBRARIES LIBRARIES _name _list _flags) # This macro checks for the existence of the combination of libraries given by _list. # If the combination is found, this macro checks whether we can link against that library # combination using the name of a routine given by _name using the linker # flags given by _flags. If the combination of libraries is found and passes # the link test, LIBRARIES is set to the list of complete library paths that # have been found. Otherwise, LIBRARIES is set to FALSE. # N.B. _prefix is the prefix applied to the names of all cached variables that # are generated internally and marked advanced by this macro. SET(_prefix "${LIBRARIES}") # start checking SET(_libraries_work TRUE) SET(${LIBRARIES}) SET(_combined_name) SET(_paths) set(__list) foreach(_elem ${_list}) if(__list) set(__list "${__list} - ${_elem}") else(__list) set(__list "${_elem}") endif(__list) endforeach(_elem) message(STATUS "Checking for [${__list}]") FOREACH(_library ${_list}) SET(_combined_name ${_combined_name}_${_library}) IF(_libraries_work) IF(${_library} STREQUAL "gomp") FIND_PACKAGE(OpenMP) IF(OPENMP_FOUND) SET(${_prefix}_${_library}_LIBRARY ${OpenMP_C_FLAGS}) ENDIF(OPENMP_FOUND) ELSE(${_library} STREQUAL "gomp") FIND_LIBRARY(${_prefix}_${_library}_LIBRARY NAMES ${_library}) ENDIF(${_library} STREQUAL "gomp") MARK_AS_ADVANCED(${_prefix}_${_library}_LIBRARY) SET(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) SET(_libraries_work ${${_prefix}_${_library}_LIBRARY}) IF(${_prefix}_${_library}_LIBRARY) MESSAGE(STATUS " Library ${_library}: ${${_prefix}_${_library}_LIBRARY}") ELSE(${_prefix}_${_library}_LIBRARY) MESSAGE(STATUS " Library ${_library}: not found") ENDIF(${_prefix}_${_library}_LIBRARY) ENDIF(_libraries_work) ENDFOREACH(_library ${_list}) # Test this combination of libraries. IF(_libraries_work) SET(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}}) SET(CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES};${CMAKE_REQUIRED_LIBRARIES}") CHECK_FUNCTION_EXISTS(${_name} ${_prefix}${_combined_name}_WORKS) SET(CMAKE_REQUIRED_LIBRARIES) MARK_AS_ADVANCED(${_prefix}${_combined_name}_WORKS) SET(_libraries_work ${${_prefix}${_combined_name}_WORKS}) ENDIF(_libraries_work) # Fin IF(_libraries_work) ELSE (_libraries_work) SET(${LIBRARIES}) MARK_AS_ADVANCED(${LIBRARIES}) ENDIF(_libraries_work) ENDMACRO(CHECK_ALL_LIBRARIES) if(WIN32) set(mkl_m "") else(WIN32) set(mkl_m "m") endif(WIN32) if(UNIX AND NOT APPLE) set(mkl_dl "${CMAKE_DL_LIBS}") else(UNIX AND NOT APPLE) set(mkl_dl "") endif(UNIX AND NOT APPLE) # Check for version 10/11 IF (NOT MKL_LIBRARIES) SET(MKL_VERSION 1011) ENDIF (NOT MKL_LIBRARIES) FOREACH(mklrtl ${mklrtls} "") FOREACH(mkliface ${mklifaces}) FOREACH(mkl64 ${mkl64s} "") FOREACH(mklthread ${mklthreads}) IF (NOT MKL_LIBRARIES AND NOT INTEL_MKL_SEQUENTIAL) CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm "mkl_${mkliface}${mkl64};${mklthread};mkl_core;${mklrtl};pthread;${mkl_m};${mkl_dl}" "") ENDIF (NOT MKL_LIBRARIES AND NOT INTEL_MKL_SEQUENTIAL) ENDFOREACH(mklthread) ENDFOREACH(mkl64) ENDFOREACH(mkliface) ENDFOREACH(mklrtl) FOREACH(mklrtl ${mklrtls} "") FOREACH(mkliface ${mklifaces}) FOREACH(mkl64 ${mkl64s} "") IF (NOT MKL_LIBRARIES) CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm "mkl_${mkliface}${mkl64};mkl_sequential;mkl_core;${mkl_m};${mkl_dl}" "") IF (MKL_LIBRARIES) SET(mklseq "_sequential") ENDIF (MKL_LIBRARIES) ENDIF (NOT MKL_LIBRARIES) ENDFOREACH(mkl64) ENDFOREACH(mkliface) ENDFOREACH(mklrtl) FOREACH(mklrtl ${mklrtls} "") FOREACH(mkliface ${mklifaces}) FOREACH(mkl64 ${mkl64s} "") FOREACH(mklthread ${mklthreads}) IF (NOT MKL_LIBRARIES) CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm "mkl_${mkliface}${mkl64};${mklthread};mkl_core;${mklrtl};pthread;${mkl_m};${mkl_dl}" "") ENDIF (NOT MKL_LIBRARIES) ENDFOREACH(mklthread) ENDFOREACH(mkl64) ENDFOREACH(mkliface) ENDFOREACH(mklrtl) # Check for older versions IF (NOT MKL_LIBRARIES) SET(MKL_VERSION 900) CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm "mkl;guide;pthread;m" "") ENDIF (NOT MKL_LIBRARIES) # Include files IF (MKL_LIBRARIES) FIND_PATH(MKL_INCLUDE_DIR "mkl_cblas.h") MARK_AS_ADVANCED(MKL_INCLUDE_DIR) ENDIF (MKL_LIBRARIES) # Other libraries IF (MKL_LIBRARIES) FOREACH(mkl64 ${mkl64s} "_core" "") FOREACH(mkls ${mklseq} "") IF (NOT MKL_LAPACK_LIBRARIES) FIND_LIBRARY(MKL_LAPACK_LIBRARIES NAMES "mkl_lapack${mkl64}${mkls}") MARK_AS_ADVANCED(MKL_LAPACK_LIBRARIES) ENDIF (NOT MKL_LAPACK_LIBRARIES) IF (NOT MKL_SCALAPACK_LIBRARIES) FIND_LIBRARY(MKL_SCALAPACK_LIBRARIES NAMES "mkl_scalapack${mkl64}${mkls}") MARK_AS_ADVANCED(MKL_SCALAPACK_LIBRARIES) ENDIF (NOT MKL_SCALAPACK_LIBRARIES) IF (NOT MKL_SOLVER_LIBRARIES) FIND_LIBRARY(MKL_SOLVER_LIBRARIES NAMES "mkl_solver${mkl64}${mkls}") MARK_AS_ADVANCED(MKL_SOLVER_LIBRARIES) ENDIF (NOT MKL_SOLVER_LIBRARIES) IF (NOT MKL_CDFT_LIBRARIES) FIND_LIBRARY(MKL_CDFT_LIBRARIES NAMES "mkl_cdft${mkl64}${mkls}") MARK_AS_ADVANCED(MKL_CDFT_LIBRARIES) ENDIF (NOT MKL_CDFT_LIBRARIES) ENDFOREACH(mkls) ENDFOREACH(mkl64) ENDIF (MKL_LIBRARIES) # LibIRC: intel compiler always links this; # gcc does not; but mkl kernels sometimes need it. IF (MKL_LIBRARIES) IF (CMAKE_COMPILER_IS_GNUCC) FIND_LIBRARY(MKL_KERNEL_libirc "irc") ELSEIF (CMAKE_C_COMPILER_ID AND NOT CMAKE_C_COMPILER_ID STREQUAL "Intel") FIND_LIBRARY(MKL_KERNEL_libirc "irc") ENDIF (CMAKE_COMPILER_IS_GNUCC) MARK_AS_ADVANCED(MKL_KERNEL_libirc) IF (MKL_KERNEL_libirc) SET(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_KERNEL_libirc}) ENDIF (MKL_KERNEL_libirc) ENDIF (MKL_LIBRARIES) # Final SET(CMAKE_LIBRARY_PATH ${saved_CMAKE_LIBRARY_PATH}) SET(CMAKE_INCLUDE_PATH ${saved_CMAKE_INCLUDE_PATH}) IF (MKL_LIBRARIES) SET(MKL_FOUND TRUE) ELSE (MKL_LIBRARIES) SET(MKL_FOUND FALSE) SET(MKL_VERSION) ENDIF (MKL_LIBRARIES) # Standard termination IF(NOT MKL_FOUND AND MKL_FIND_REQUIRED) MESSAGE(FATAL_ERROR "MKL library not found. Please specify library location") ENDIF(NOT MKL_FOUND AND MKL_FIND_REQUIRED) IF(NOT MKL_FIND_QUIETLY) IF(MKL_FOUND) MESSAGE(STATUS "MKL library found") ELSE(MKL_FOUND) MESSAGE(STATUS "MKL library not found") ENDIF(MKL_FOUND) ENDIF(NOT MKL_FIND_QUIETLY) # Do nothing if MKL_FOUND was set before! ENDIF (NOT MKL_FOUND) lib/TH/cmake/FindSSE.cmake000066400000000000000000000053751316246254300154740ustar00rootroot00000000000000INCLUDE(CheckCSourceRuns) INCLUDE(CheckCXXSourceRuns) SET(SSE1_CODE " #include int main() { __m128 a; float vals[4] = {0,0,0,0}; a = _mm_loadu_ps(vals); return 0; }") SET(SSE2_CODE " #include int main() { __m128d a; double vals[2] = {0,0}; a = _mm_loadu_pd(vals); return 0; }") SET(SSE3_CODE " #include int main( ) { const int vals[4] = {0,0,0,0}; __m128i a; a = _mm_lddqu_si128( (const __m128i*)vals ); return 0; }") SET(SSE4_1_CODE " #include int main () { __m128i a = {0,0,0,0}, b = {0,0,0,0}; __m128i res = _mm_max_epi8(a, b); return 0; } ") SET(SSE4_2_CODE " #include int main() { __m128i a = {0,0,0,0}, b = {0,0,0,0}, c = {0,0,0,0}; c = _mm_cmpgt_epi64(a, b); return 0; } ") SET(AVX_CODE " #include int main() { __m256 a; a = _mm256_set1_ps(0); return 0; } ") SET(AVX2_CODE " #include int main() { __m256i a = {0}; a = _mm256_abs_epi16(a); return 0; } ") MACRO(CHECK_SSE lang type flags) SET(__FLAG_I 1) SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) FOREACH(__FLAG ${flags}) IF(NOT ${lang}_${type}_FOUND) SET(CMAKE_REQUIRED_FLAGS ${__FLAG}) IF(lang STREQUAL "CXX") CHECK_CXX_SOURCE_RUNS("${${type}_CODE}" ${lang}_HAS_${type}_${__FLAG_I}) ELSE() CHECK_C_SOURCE_RUNS("${${type}_CODE}" ${lang}_HAS_${type}_${__FLAG_I}) ENDIF() IF(${lang}_HAS_${type}_${__FLAG_I}) SET(${lang}_${type}_FOUND TRUE CACHE BOOL "${lang} ${type} support") SET(${lang}_${type}_FLAGS "${__FLAG}" CACHE STRING "${lang} ${type} flags") ENDIF() MATH(EXPR __FLAG_I "${__FLAG_I}+1") ENDIF() ENDFOREACH() SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) IF(NOT ${lang}_${type}_FOUND) SET(${lang}_${type}_FOUND FALSE CACHE BOOL "${lang} ${type} support") SET(${lang}_${type}_FLAGS "" CACHE STRING "${lang} ${type} flags") ENDIF() MARK_AS_ADVANCED(${lang}_${type}_FOUND ${lang}_${type}_FLAGS) ENDMACRO() CHECK_SSE(C "SSE1" " ;-msse;/arch:SSE") CHECK_SSE(C "SSE2" " ;-msse2;/arch:SSE2") CHECK_SSE(C "SSE3" " ;-msse3;/arch:SSE3") CHECK_SSE(C "SSE4_1" " ;-msse4.1;-msse4;/arch:SSE4") CHECK_SSE(C "SSE4_2" " ;-msse4.2;-msse4;/arch:SSE4") CHECK_SSE(C "AVX" " ;-mavx;/arch:AVX") CHECK_SSE(C "AVX2" " ;-mavx2 -mfma;/arch:AVX2") CHECK_SSE(CXX "SSE1" " ;-msse;/arch:SSE") CHECK_SSE(CXX "SSE2" " ;-msse2;/arch:SSE2") CHECK_SSE(CXX "SSE3" " ;-msse3;/arch:SSE3") CHECK_SSE(CXX "SSE4_1" " ;-msse4.1;-msse4;/arch:SSE4") CHECK_SSE(CXX "SSE4_2" " ;-msse4.2;-msse4;/arch:SSE4") CHECK_SSE(CXX "AVX" " ;-mavx;/arch:AVX") CHECK_SSE(CXX "AVX2" " ;-mavx2 -mfma;/arch:AVX2") lib/TH/generic/000077500000000000000000000000001316246254300135615ustar00rootroot00000000000000lib/TH/generic/THBlas.c000066400000000000000000000235121316246254300150450ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THBlas.c" #else #ifdef BLAS_F2C # define ffloat double #else # define ffloat float #endif TH_EXTERNC void dswap_(int *n, double *x, int *incx, double *y, int *incy); TH_EXTERNC void sswap_(int *n, float *x, int *incx, float *y, int *incy); TH_EXTERNC void dscal_(int *n, double *a, double *x, int *incx); TH_EXTERNC void sscal_(int *n, float *a, float *x, int *incx); TH_EXTERNC void dcopy_(int *n, double *x, int *incx, double *y, int *incy); TH_EXTERNC void scopy_(int *n, float *x, int *incx, float *y, int *incy); TH_EXTERNC void daxpy_(int *n, double *a, double *x, int *incx, double *y, int *incy); TH_EXTERNC void saxpy_(int *n, float *a, float *x, int *incx, float *y, int *incy); TH_EXTERNC double ddot_(int *n, double *x, int *incx, double *y, int *incy); #ifdef BLAS_USE_CBLAS_DOT TH_EXTERNC float cblas_sdot(const int n, const float *x, const int incx, const float *y, const int incy); #ifndef THBlas_C_sdot_ #define THBlas_C_sdot_ inline ffloat sdot_(const int *n, const float *x, const int *incx, const float *y, const int *incy) { return cblas_sdot(*n, x, *incx, y, *incy); } #endif #else TH_EXTERNC ffloat sdot_(int *n, float *x, int *incx, float *y, int *incy); #endif TH_EXTERNC void dgemv_(char *trans, int *m, int *n, double *alpha, double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy); TH_EXTERNC void sgemv_(char *trans, int *m, int *n, float *alpha, float *a, int *lda, float *x, int *incx, float *beta, float *y, int *incy); TH_EXTERNC void dger_(int *m, int *n, double *alpha, double *x, int *incx, double *y, int *incy, double *a, int *lda); TH_EXTERNC void sger_(int *m, int *n, float *alpha, float *x, int *incx, float *y, int *incy, float *a, int *lda); TH_EXTERNC void dgemm_(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc); TH_EXTERNC void sgemm_(char *transa, char *transb, int *m, int *n, int *k, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc); void THBlas_(swap)(long n, real *x, long incx, real *y, long incy) { if(n == 1) { incx = 1; incy = 1; } #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) { int i_n = (int)n; int i_incx = (int)incx; int i_incy = (int)incy; #if defined(TH_REAL_IS_DOUBLE) dswap_(&i_n, x, &i_incx, y, &i_incy); #else sswap_(&i_n, x, &i_incx, y, &i_incy); #endif return; } #endif { long i; for(i = 0; i < n; i++) { real z = x[i*incx]; x[i*incx] = y[i*incy]; y[i*incy] = z; } } } void THBlas_(scal)(long n, real a, real *x, long incx) { if(n == 1) incx = 1; #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (n <= INT_MAX) && (incx <= INT_MAX) ) { int i_n = (int)n; int i_incx = (int)incx; #if defined(TH_REAL_IS_DOUBLE) dscal_(&i_n, &a, x, &i_incx); #else sscal_(&i_n, &a, x, &i_incx); #endif return; } #endif { long i; for(i = 0; i < n; i++) { if (a == 0) { x[i*incx] = 0; } else { x[i*incx] *= a; } } } } void THBlas_(copy)(long n, real *x, long incx, real *y, long incy) { if(n == 1) { incx = 1; incy = 1; } #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) { int i_n = (int)n; int i_incx = (int)incx; int i_incy = (int)incy; #if defined(TH_REAL_IS_DOUBLE) dcopy_(&i_n, x, &i_incx, y, &i_incy); #else scopy_(&i_n, x, &i_incx, y, &i_incy); #endif return; } #endif { long i; for(i = 0; i < n; i++) y[i*incy] = x[i*incx]; } } void THBlas_(axpy)(long n, real a, real *x, long incx, real *y, long incy) { if(n == 1) { incx = 1; incy = 1; } #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) { int i_n = (int)n; int i_incx = (int)incx; int i_incy = (int)incy; #if defined(TH_REAL_IS_DOUBLE) daxpy_(&i_n, &a, x, &i_incx, y, &i_incy); #else saxpy_(&i_n, &a, x, &i_incx, y, &i_incy); #endif return; } #endif { long i; for(i = 0; i < n; i++) y[i*incy] += a*x[i*incx]; } } real THBlas_(dot)(long n, real *x, long incx, real *y, long incy) { if(n == 1) { incx = 1; incy = 1; } #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) { int i_n = (int)n; int i_incx = (int)incx; int i_incy = (int)incy; #if defined(TH_REAL_IS_DOUBLE) return (real) ddot_(&i_n, x, &i_incx, y, &i_incy); #else return (real) sdot_(&i_n, x, &i_incx, y, &i_incy); #endif } #endif { long i; real sum = 0; for(i = 0; i < n; i++) sum += x[i*incx]*y[i*incy]; return sum; } } void THBlas_(gemv)(char trans, long m, long n, real alpha, real *a, long lda, real *x, long incx, real beta, real *y, long incy) { if(n == 1) lda = m; #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (m <= INT_MAX) && (n <= INT_MAX) && (lda > 0) && (lda <= INT_MAX) && (incx > 0) && (incx <= INT_MAX) && (incy > 0) && (incy <= INT_MAX) ) { int i_m = (int)m; int i_n = (int)n; int i_lda = (int)lda; int i_incx = (int)incx; int i_incy = (int)incy; #if defined(TH_REAL_IS_DOUBLE) dgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy); #else sgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy); #endif return; } #endif { long i, j; if( (trans == 'T') || (trans == 't') ) { for(i = 0; i < n; i++) { real sum = 0; real *row_ = a+lda*i; for(j = 0; j < m; j++) sum += x[j*incx]*row_[j]; if (beta == 0) y[i*incy] = alpha*sum; else y[i*incy] = beta*y[i*incy] + alpha*sum; } } else { if(beta != 1) THBlas_(scal)(m, beta, y, incy); for(j = 0; j < n; j++) { real *column_ = a+lda*j; real z = alpha*x[j*incx]; for(i = 0; i < m; i++) y[i*incy] += z*column_[i]; } } } } void THBlas_(ger)(long m, long n, real alpha, real *x, long incx, real *y, long incy, real *a, long lda) { if(n == 1) lda = m; #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (m <= INT_MAX) && (n <= INT_MAX) && (lda <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) { int i_m = (int)m; int i_n = (int)n; int i_lda = (int)lda; int i_incx = (int)incx; int i_incy = (int)incy; #if defined(TH_REAL_IS_DOUBLE) dger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda); #else sger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda); #endif return; } #endif { long i, j; for(j = 0; j < n; j++) { real *column_ = a+j*lda; real z = alpha*y[j*incy]; for(i = 0; i < m; i++) column_[i] += z*x[i*incx] ; } } } void THBlas_(gemm)(char transa, char transb, long m, long n, long k, real alpha, real *a, long lda, real *b, long ldb, real beta, real *c, long ldc) { int transa_ = ((transa == 't') || (transa == 'T')); int transb_ = ((transb == 't') || (transb == 'T')); if(n == 1) ldc = m; if(transa_) { if(m == 1) lda = k; } else { if(k == 1) lda = m; } if(transb_) { if(k == 1) ldb = n; } else { if(n == 1) ldb = k; } #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (m <= INT_MAX) && (n <= INT_MAX) && (k <= INT_MAX) && (lda <= INT_MAX) && (ldb <= INT_MAX) && (ldc <= INT_MAX) ) { int i_m = (int)m; int i_n = (int)n; int i_k = (int)k; int i_lda = (int)lda; int i_ldb = (int)ldb; int i_ldc = (int)ldc; #if defined(TH_REAL_IS_DOUBLE) dgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc); #else sgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc); #endif return; } #endif { long i, j, l; if(!transa_ && !transb_) { real *a_ = a; for(i = 0; i < m; i++) { real *b_ = b; for(j = 0; j < n; j++) { real sum = 0; for(l = 0; l < k; l++) sum += a_[l*lda]*b_[l]; b_ += ldb; if (beta == 0) c[j*ldc+i] = alpha*sum; else c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; } a_++; } } else if(transa_ && !transb_) { real *a_ = a; for(i = 0; i < m; i++) { real *b_ = b; for(j = 0; j < n; j++) { real sum = 0; for(l = 0; l < k; l++) sum += a_[l]*b_[l]; b_ += ldb; if (beta == 0) c[j*ldc+i] = alpha*sum; else c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; } a_ += lda; } } else if(!transa_ && transb_) { real *a_ = a; for(i = 0; i < m; i++) { real *b_ = b; for(j = 0; j < n; j++) { real sum = 0; for(l = 0; l < k; l++) sum += a_[l*lda]*b_[l*ldb]; b_++; if (beta == 0) c[j*ldc+i] = alpha*sum; else c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; } a_++; } } else { real *a_ = a; for(i = 0; i < m; i++) { real *b_ = b; for(j = 0; j < n; j++) { real sum = 0; for(l = 0; l < k; l++) sum += a_[l]*b_[l*ldb]; b_++; if (beta == 0) c[j*ldc+i] = alpha*sum; else c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; } a_ += lda; } } } } #endif lib/TH/generic/THBlas.h000066400000000000000000000016071316246254300150530ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THBlas.h" #else /* Level 1 */ TH_API void THBlas_(swap)(long n, real *x, long incx, real *y, long incy); TH_API void THBlas_(scal)(long n, real a, real *x, long incx); TH_API void THBlas_(copy)(long n, real *x, long incx, real *y, long incy); TH_API void THBlas_(axpy)(long n, real a, real *x, long incx, real *y, long incy); TH_API real THBlas_(dot)(long n, real *x, long incx, real *y, long incy); /* Level 2 */ TH_API void THBlas_(gemv)(char trans, long m, long n, real alpha, real *a, long lda, real *x, long incx, real beta, real *y, long incy); TH_API void THBlas_(ger)(long m, long n, real alpha, real *x, long incx, real *y, long incy, real *a, long lda); /* Level 3 */ TH_API void THBlas_(gemm)(char transa, char transb, long m, long n, long k, real alpha, real *a, long lda, real *b, long ldb, real beta, real *c, long ldc); #endif lib/TH/generic/THLapack.c000066400000000000000000000254371316246254300153670ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THLapack.c" #else TH_EXTERNC void dgesv_(int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info); TH_EXTERNC void sgesv_(int *n, int *nrhs, float *a, int *lda, int *ipiv, float *b, int *ldb, int *info); TH_EXTERNC void dtrtrs_(char *uplo, char *trans, char *diag, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info); TH_EXTERNC void strtrs_(char *uplo, char *trans, char *diag, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, int *info); TH_EXTERNC void dgels_(char *trans, int *m, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, double *work, int *lwork, int *info); TH_EXTERNC void sgels_(char *trans, int *m, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, float *work, int *lwork, int *info); TH_EXTERNC void dsyev_(char *jobz, char *uplo, int *n, double *a, int *lda, double *w, double *work, int *lwork, int *info); TH_EXTERNC void ssyev_(char *jobz, char *uplo, int *n, float *a, int *lda, float *w, float *work, int *lwork, int *info); TH_EXTERNC void dgeev_(char *jobvl, char *jobvr, int *n, double *a, int *lda, double *wr, double *wi, double* vl, int *ldvl, double *vr, int *ldvr, double *work, int *lwork, int *info); TH_EXTERNC void sgeev_(char *jobvl, char *jobvr, int *n, float *a, int *lda, float *wr, float *wi, float* vl, int *ldvl, float *vr, int *ldvr, float *work, int *lwork, int *info); TH_EXTERNC void dgesvd_(char *jobu, char *jobvt, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *info); TH_EXTERNC void sgesvd_(char *jobu, char *jobvt, int *m, int *n, float *a, int *lda, float *s, float *u, int *ldu, float *vt, int *ldvt, float *work, int *lwork, int *info); TH_EXTERNC void dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv, int *info); TH_EXTERNC void sgetrf_(int *m, int *n, float *a, int *lda, int *ipiv, int *info); TH_EXTERNC void dgetrs_(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info); TH_EXTERNC void sgetrs_(char *trans, int *n, int *nrhs, float *a, int *lda, int *ipiv, float *b, int *ldb, int *info); TH_EXTERNC void dgetri_(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info); TH_EXTERNC void sgetri_(int *n, float *a, int *lda, int *ipiv, float *work, int *lwork, int *info); TH_EXTERNC void dpotrf_(char *uplo, int *n, double *a, int *lda, int *info); TH_EXTERNC void spotrf_(char *uplo, int *n, float *a, int *lda, int *info); TH_EXTERNC void dpotri_(char *uplo, int *n, double *a, int *lda, int *info); TH_EXTERNC void spotri_(char *uplo, int *n, float *a, int *lda, int *info); TH_EXTERNC void dpotrs_(char *uplo, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info); TH_EXTERNC void spotrs_(char *uplo, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, int *info); TH_EXTERNC void sgeqrf_(int *m, int *n, float *a, int *lda, float *tau, float *work, int *lwork, int *info); TH_EXTERNC void dgeqrf_(int *m, int *n, double *a, int *lda, double *tau, double *work, int *lwork, int *info); TH_EXTERNC void sorgqr_(int *m, int *n, int *k, float *a, int *lda, float *tau, float *work, int *lwork, int *info); TH_EXTERNC void dorgqr_(int *m, int *n, int *k, double *a, int *lda, double *tau, double *work, int *lwork, int *info); TH_EXTERNC void sormqr_(char *side, char *trans, int *m, int *n, int *k, float *a, int *lda, float *tau, float *c, int *ldc, float *work, int *lwork, int *info); TH_EXTERNC void dormqr_(char *side, char *trans, int *m, int *n, int *k, double *a, int *lda, double *tau, double *c, int *ldc, double *work, int *lwork, int *info); TH_EXTERNC void spstrf_(char *uplo, int *n, float *a, int *lda, int *piv, int *rank, float *tol, float *work, int *info); TH_EXTERNC void dpstrf_(char *uplo, int *n, double *a, int *lda, int *piv, int *rank, double *tol, double *work, int *info); /* Compute the solution to a real system of linear equations A * X = B */ void THLapack_(gesv)(int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int* info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dgesv_(&n, &nrhs, a, &lda, ipiv, b, &ldb, info); #else sgesv_(&n, &nrhs, a, &lda, ipiv, b, &ldb, info); #endif #else THError("gesv : Lapack library not found in compile time\n"); #endif return; } /* Solve a triangular system of the form A * X = B or A^T * X = B */ void THLapack_(trtrs)(char uplo, char trans, char diag, int n, int nrhs, real *a, int lda, real *b, int ldb, int* info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dtrtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, info); #else strtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, info); #endif #else THError("trtrs : Lapack library not found in compile time\n"); #endif return; } /* Solve overdetermined or underdetermined real linear systems involving an M-by-N matrix A, or its transpose, using a QR or LQ factorization of A */ void THLapack_(gels)(char trans, int m, int n, int nrhs, real *a, int lda, real *b, int ldb, real *work, int lwork, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, work, &lwork, info); #else sgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, work, &lwork, info); #endif #else THError("gels : Lapack library not found in compile time\n"); #endif } /* Compute all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A */ void THLapack_(syev)(char jobz, char uplo, int n, real *a, int lda, real *w, real *work, int lwork, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dsyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info); #else ssyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info); #endif #else THError("syev : Lapack library not found in compile time\n"); #endif } /* Compute for an N-by-N real nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors */ void THLapack_(geev)(char jobvl, char jobvr, int n, real *a, int lda, real *wr, real *wi, real* vl, int ldvl, real *vr, int ldvr, real *work, int lwork, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, info); #else sgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, info); #endif #else THError("geev : Lapack library not found in compile time\n"); #endif } /* Compute the singular value decomposition (SVD) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors */ void THLapack_(gesvd)(char jobu, char jobvt, int m, int n, real *a, int lda, real *s, real *u, int ldu, real *vt, int ldvt, real *work, int lwork, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dgesvd_( &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, info); #else sgesvd_( &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, info); #endif #else THError("gesvd : Lapack library not found in compile time\n"); #endif } /* LU decomposition */ void THLapack_(getrf)(int m, int n, real *a, int lda, int *ipiv, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dgetrf_(&m, &n, a, &lda, ipiv, info); #else sgetrf_(&m, &n, a, &lda, ipiv, info); #endif #else THError("getrf : Lapack library not found in compile time\n"); #endif } void THLapack_(getrs)(char trans, int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dgetrs_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, info); #else sgetrs_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, info); #endif #else THError("getrs : Lapack library not found in compile time\n"); #endif } /* Matrix Inverse */ void THLapack_(getri)(int n, real *a, int lda, int *ipiv, real *work, int lwork, int* info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dgetri_(&n, a, &lda, ipiv, work, &lwork, info); #else sgetri_(&n, a, &lda, ipiv, work, &lwork, info); #endif #else THError("getri : Lapack library not found in compile time\n"); #endif } /* Cholesky factorization */ void THLapack_(potrf)(char uplo, int n, real *a, int lda, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dpotrf_(&uplo, &n, a, &lda, info); #else spotrf_(&uplo, &n, a, &lda, info); #endif #else THError("potrf : Lapack library not found in compile time\n"); #endif } /* Solve A*X = B with a symmetric positive definite matrix A using the Cholesky factorization */ void THLapack_(potrs)(char uplo, int n, int nrhs, real *a, int lda, real *b, int ldb, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dpotrs_(&uplo, &n, &nrhs, a, &lda, b, &ldb, info); #else spotrs_(&uplo, &n, &nrhs, a, &lda, b, &ldb, info); #endif #else THError("potrs: Lapack library not found in compile time\n"); #endif } /* Cholesky factorization based Matrix Inverse */ void THLapack_(potri)(char uplo, int n, real *a, int lda, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dpotri_(&uplo, &n, a, &lda, info); #else spotri_(&uplo, &n, a, &lda, info); #endif #else THError("potri: Lapack library not found in compile time\n"); #endif } /* Cholesky factorization with complete pivoting */ void THLapack_(pstrf)(char uplo, int n, real *a, int lda, int *piv, int *rank, real tol, real *work, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dpstrf_(&uplo, &n, a, &lda, piv, rank, &tol, work, info); #else spstrf_(&uplo, &n, a, &lda, piv, rank, &tol, work, info); #endif #else THError("pstrf: Lapack library not found at compile time\n"); #endif } /* QR decomposition */ void THLapack_(geqrf)(int m, int n, real *a, int lda, real *tau, real *work, int lwork, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dgeqrf_(&m, &n, a, &lda, tau, work, &lwork, info); #else sgeqrf_(&m, &n, a, &lda, tau, work, &lwork, info); #endif #else THError("geqrf: Lapack library not found in compile time\n"); #endif } /* Build Q from output of geqrf */ void THLapack_(orgqr)(int m, int n, int k, real *a, int lda, real *tau, real *work, int lwork, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dorgqr_(&m, &n, &k, a, &lda, tau, work, &lwork, info); #else sorgqr_(&m, &n, &k, a, &lda, tau, work, &lwork, info); #endif #else THError("orgqr: Lapack library not found in compile time\n"); #endif } /* Multiply Q with a matrix using the output of geqrf */ void THLapack_(ormqr)(char side, char trans, int m, int n, int k, real *a, int lda, real *tau, real *c, int ldc, real *work, int lwork, int *info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); #else sormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); #endif #else THError("ormqr: Lapack library not found in compile time\n"); #endif } #endif lib/TH/generic/THLapack.h000066400000000000000000000046621316246254300153710ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THLapack.h" #else /* AX=B */ TH_API void THLapack_(gesv)(int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int* info); /* Solve a triangular system of the form A * X = B or A^T * X = B */ TH_API void THLapack_(trtrs)(char uplo, char trans, char diag, int n, int nrhs, real *a, int lda, real *b, int ldb, int* info); /* ||AX-B|| */ TH_API void THLapack_(gels)(char trans, int m, int n, int nrhs, real *a, int lda, real *b, int ldb, real *work, int lwork, int *info); /* Eigenvals */ TH_API void THLapack_(syev)(char jobz, char uplo, int n, real *a, int lda, real *w, real *work, int lwork, int *info); /* Non-sym eigenvals */ TH_API void THLapack_(geev)(char jobvl, char jobvr, int n, real *a, int lda, real *wr, real *wi, real* vl, int ldvl, real *vr, int ldvr, real *work, int lwork, int *info); /* svd */ TH_API void THLapack_(gesvd)(char jobu, char jobvt, int m, int n, real *a, int lda, real *s, real *u, int ldu, real *vt, int ldvt, real *work, int lwork, int *info); /* LU decomposition */ TH_API void THLapack_(getrf)(int m, int n, real *a, int lda, int *ipiv, int *info); TH_API void THLapack_(getrs)(char trans, int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int *info); /* Matrix Inverse */ TH_API void THLapack_(getri)(int n, real *a, int lda, int *ipiv, real *work, int lwork, int* info); /* Positive Definite matrices */ /* Cholesky factorization */ void THLapack_(potrf)(char uplo, int n, real *a, int lda, int *info); /* Matrix inverse based on Cholesky factorization */ void THLapack_(potri)(char uplo, int n, real *a, int lda, int *info); /* Solve A*X = B with a symmetric positive definite matrix A using the Cholesky factorization */ void THLapack_(potrs)(char uplo, int n, int nrhs, real *a, int lda, real *b, int ldb, int *info); /* Cholesky factorization with complete pivoting. */ void THLapack_(pstrf)(char uplo, int n, real *a, int lda, int *piv, int *rank, real tol, real *work, int *info); /* QR decomposition */ void THLapack_(geqrf)(int m, int n, real *a, int lda, real *tau, real *work, int lwork, int *info); /* Build Q from output of geqrf */ void THLapack_(orgqr)(int m, int n, int k, real *a, int lda, real *tau, real *work, int lwork, int *info); /* Multiply Q with a matrix from output of geqrf */ void THLapack_(ormqr)(char side, char trans, int m, int n, int k, real *a, int lda, real *tau, real *c, int ldc, real *work, int lwork, int *info); #endif lib/TH/generic/THStorage.c000066400000000000000000000133531316246254300155720ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THStorage.c" #else real* THStorage_(data)(const THStorage *self) { return self->data; } ptrdiff_t THStorage_(size)(const THStorage *self) { return self->size; } size_t THStorage_(elementSize)() { return sizeof(real); } THStorage* THStorage_(new)(void) { return THStorage_(newWithSize)(0); } THStorage* THStorage_(newWithSize)(ptrdiff_t size) { return THStorage_(newWithAllocator)(size, &THDefaultAllocator, NULL); } THStorage* THStorage_(newWithAllocator)(ptrdiff_t size, THAllocator *allocator, void *allocatorContext) { THStorage *storage = THAlloc(sizeof(THStorage)); storage->data = allocator->malloc(allocatorContext, sizeof(real)*size); storage->size = size; storage->refcount = 1; storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM; storage->allocator = allocator; storage->allocatorContext = allocatorContext; return storage; } THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags) { THMapAllocatorContext *ctx = THMapAllocatorContext_new(filename, flags); THStorage *storage = THStorage_(newWithAllocator)(size, &THMapAllocator, ctx); if(size <= 0) storage->size = THMapAllocatorContext_size(ctx)/sizeof(real); THStorage_(clearFlag)(storage, TH_STORAGE_RESIZABLE); return storage; } THStorage* THStorage_(newWithSize1)(real data0) { THStorage *self = THStorage_(newWithSize)(1); self->data[0] = data0; return self; } THStorage* THStorage_(newWithSize2)(real data0, real data1) { THStorage *self = THStorage_(newWithSize)(2); self->data[0] = data0; self->data[1] = data1; return self; } THStorage* THStorage_(newWithSize3)(real data0, real data1, real data2) { THStorage *self = THStorage_(newWithSize)(3); self->data[0] = data0; self->data[1] = data1; self->data[2] = data2; return self; } THStorage* THStorage_(newWithSize4)(real data0, real data1, real data2, real data3) { THStorage *self = THStorage_(newWithSize)(4); self->data[0] = data0; self->data[1] = data1; self->data[2] = data2; self->data[3] = data3; return self; } void THStorage_(setFlag)(THStorage *storage, const char flag) { storage->flag |= flag; } void THStorage_(clearFlag)(THStorage *storage, const char flag) { storage->flag &= ~flag; } void THStorage_(retain)(THStorage *storage) { if(storage && (storage->flag & TH_STORAGE_REFCOUNTED)) THAtomicIncrementRef(&storage->refcount); } void THStorage_(free)(THStorage *storage) { if(!storage) return; if((storage->flag & TH_STORAGE_REFCOUNTED) && (THAtomicGet(&storage->refcount) > 0)) { if(THAtomicDecrementRef(&storage->refcount)) { if(storage->flag & TH_STORAGE_FREEMEM) { storage->allocator->free(storage->allocatorContext, storage->data); } if(storage->flag & TH_STORAGE_VIEW) { THStorage_(free)(storage->view); } THFree(storage); } } } THStorage* THStorage_(newWithData)(real *data, ptrdiff_t size) { return THStorage_(newWithDataAndAllocator)(data, size, &THDefaultAllocator, NULL); } THStorage* THStorage_(newWithDataAndAllocator)(real* data, ptrdiff_t size, THAllocator* allocator, void* allocatorContext) { THStorage *storage = THAlloc(sizeof(THStorage)); storage->data = data; storage->size = size; storage->refcount = 1; storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM; storage->allocator = allocator; storage->allocatorContext = allocatorContext; return storage; } void THStorage_(resize)(THStorage *storage, ptrdiff_t size) { if(storage->flag & TH_STORAGE_RESIZABLE) { if(storage->allocator->realloc == NULL) { /* case when the allocator does not have a realloc defined */ real *old_data = storage->data; ptrdiff_t old_size = storage->size; if (size == 0) { storage->data = NULL; } else { storage->data = storage->allocator->malloc( storage->allocatorContext, sizeof(real)*size); } storage->size = size; if (old_data != NULL) { ptrdiff_t copy_size = old_size; if (storage->size < copy_size) { copy_size = storage->size; } if (copy_size > 0) { memcpy(storage->data, old_data, sizeof(real)*copy_size); } storage->allocator->free(storage->allocatorContext, old_data); } } else { storage->data = storage->allocator->realloc( storage->allocatorContext, storage->data, sizeof(real)*size); storage->size = size; } } else { THError("Trying to resize storage that is not resizable"); } } void THStorage_(fill)(THStorage *storage, real value) { ptrdiff_t i; for(i = 0; i < storage->size; i++) storage->data[i] = value; } void THStorage_(set)(THStorage *self, ptrdiff_t idx, real value) { THArgCheck((idx >= 0) && (idx < self->size), 2, "out of bounds"); self->data[idx] = value; } real THStorage_(get)(const THStorage *self, ptrdiff_t idx) { THArgCheck((idx >= 0) && (idx < self->size), 2, "out of bounds"); return self->data[idx]; } void THStorage_(swap)(THStorage *storage1, THStorage *storage2) { #define SWAP(val) { val = storage1->val; storage1->val = storage2->val; storage2->val = val; } real *data; ptrdiff_t size; char flag; THAllocator *allocator; void *allocatorContext; struct THStorage *view; SWAP(data); SWAP(size); SWAP(flag); // don't swap refcount! SWAP(allocator); SWAP(allocatorContext); SWAP(view); #undef SWAP } #endif lib/TH/generic/THStorage.h000066400000000000000000000047731316246254300156050ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THStorage.h" #else /* on pourrait avoir un liste chainee qui initialise math, lab structures (or more). mouais -- complique. Pb: THMapStorage is kind of a class THLab_()... comment je m'en sors? en template, faudrait que je les instancie toutes!!! oh boy! Et comment je sais que c'est pour Cuda? Le type float est le meme dans les <> au bout du compte, ca serait sur des pointeurs float/double... etc... = facile. primitives?? */ #define TH_STORAGE_REFCOUNTED 1 #define TH_STORAGE_RESIZABLE 2 #define TH_STORAGE_FREEMEM 4 #define TH_STORAGE_VIEW 8 typedef struct THStorage { real *data; ptrdiff_t size; int refcount; char flag; THAllocator *allocator; void *allocatorContext; struct THStorage *view; } THStorage; TH_API real* THStorage_(data)(const THStorage*); TH_API ptrdiff_t THStorage_(size)(const THStorage*); TH_API size_t THStorage_(elementSize)(void); /* slow access -- checks everything */ TH_API void THStorage_(set)(THStorage*, ptrdiff_t, real); TH_API real THStorage_(get)(const THStorage*, ptrdiff_t); TH_API THStorage* THStorage_(new)(void); TH_API THStorage* THStorage_(newWithSize)(ptrdiff_t size); TH_API THStorage* THStorage_(newWithSize1)(real); TH_API THStorage* THStorage_(newWithSize2)(real, real); TH_API THStorage* THStorage_(newWithSize3)(real, real, real); TH_API THStorage* THStorage_(newWithSize4)(real, real, real, real); TH_API THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags); /* takes ownership of data */ TH_API THStorage* THStorage_(newWithData)(real *data, ptrdiff_t size); TH_API THStorage* THStorage_(newWithAllocator)(ptrdiff_t size, THAllocator* allocator, void *allocatorContext); TH_API THStorage* THStorage_(newWithDataAndAllocator)( real* data, ptrdiff_t size, THAllocator* allocator, void *allocatorContext); /* should not differ with API */ TH_API void THStorage_(setFlag)(THStorage *storage, const char flag); TH_API void THStorage_(clearFlag)(THStorage *storage, const char flag); TH_API void THStorage_(retain)(THStorage *storage); TH_API void THStorage_(swap)(THStorage *storage1, THStorage *storage2); /* might differ with other API (like CUDA) */ TH_API void THStorage_(free)(THStorage *storage); TH_API void THStorage_(resize)(THStorage *storage, ptrdiff_t size); TH_API void THStorage_(fill)(THStorage *storage, real value); #endif lib/TH/generic/THStorageCopy.c000066400000000000000000000046071316246254300164270ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THStorageCopy.c" #else void THStorage_(rawCopy)(THStorage *storage, real *src) { ptrdiff_t i; for(i = 0; i < storage->size; i++) storage->data[i] = src[i]; } void THStorage_(copy)(THStorage *storage, THStorage *src) { THArgCheck(storage->size == src->size, 2, "size mismatch"); THStorage_(rawCopy)(storage, src->data); } #define IMPLEMENT_THStorage_COPY(TYPENAMESRC) \ void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \ { \ ptrdiff_t i; \ for(i = 0; i < storage->size; i++) \ storage->data[i] = (real)src->data[i]; \ } #define IMPLEMENT_THStorage_COPY_FROM_HALF(TYPENAMESRC) \ void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \ { \ THArgCheck(storage->size == src->size, 2, "size mismatch"); \ ptrdiff_t i; \ for(i = 0; i < storage->size; i++) \ storage->data[i] = (real)TH_half2float(src->data[i]); \ } #define IMPLEMENT_THStorage_COPY_TO_HALF(TYPENAMESRC) \ void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \ { \ THArgCheck(storage->size == src->size, 2, "size mismatch"); \ ptrdiff_t i; \ for(i = 0; i < storage->size; i++) \ storage->data[i] = TH_float2half((float)(src->data[i])); \ } #define IMPLEMENT_THStorage_COPY_TO_FROM_HALF(TYPENAMESRC) \ void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \ { \ THArgCheck(storage->size == src->size, 2, "size mismatch"); \ ptrdiff_t i; \ for(i = 0; i < storage->size; i++) \ storage->data[i] = src->data[i]; \ } #ifndef TH_REAL_IS_HALF IMPLEMENT_THStorage_COPY(Byte) IMPLEMENT_THStorage_COPY(Char) IMPLEMENT_THStorage_COPY(Short) IMPLEMENT_THStorage_COPY(Int) IMPLEMENT_THStorage_COPY(Long) IMPLEMENT_THStorage_COPY(Float) IMPLEMENT_THStorage_COPY(Double) IMPLEMENT_THStorage_COPY_FROM_HALF(Half) #else /* only allow pass-through for Half */ IMPLEMENT_THStorage_COPY_TO_FROM_HALF(Half) IMPLEMENT_THStorage_COPY_TO_HALF(Byte) IMPLEMENT_THStorage_COPY_TO_HALF(Char) IMPLEMENT_THStorage_COPY_TO_HALF(Short) IMPLEMENT_THStorage_COPY_TO_HALF(Int) IMPLEMENT_THStorage_COPY_TO_HALF(Long) IMPLEMENT_THStorage_COPY_TO_HALF(Float) IMPLEMENT_THStorage_COPY_TO_HALF(Double) #endif #endif lib/TH/generic/THStorageCopy.h000066400000000000000000000016411316246254300164270ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THStorageCopy.h" #else /* Support for copy between different Storage types */ TH_API void THStorage_(rawCopy)(THStorage *storage, real *src); TH_API void THStorage_(copy)(THStorage *storage, THStorage *src); TH_API void THStorage_(copyByte)(THStorage *storage, struct THByteStorage *src); TH_API void THStorage_(copyChar)(THStorage *storage, struct THCharStorage *src); TH_API void THStorage_(copyShort)(THStorage *storage, struct THShortStorage *src); TH_API void THStorage_(copyInt)(THStorage *storage, struct THIntStorage *src); TH_API void THStorage_(copyLong)(THStorage *storage, struct THLongStorage *src); TH_API void THStorage_(copyFloat)(THStorage *storage, struct THFloatStorage *src); TH_API void THStorage_(copyDouble)(THStorage *storage, struct THDoubleStorage *src); TH_API void THStorage_(copyHalf)(THStorage *storage, struct THHalfStorage *src); #endif lib/TH/generic/THTensor.c000066400000000000000000000645551316246254300154520ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensor.c" #else /**** access methods ****/ THStorage *THTensor_(storage)(const THTensor *self) { return self->storage; } ptrdiff_t THTensor_(storageOffset)(const THTensor *self) { return self->storageOffset; } int THTensor_(nDimension)(const THTensor *self) { return self->nDimension; } long THTensor_(size)(const THTensor *self, int dim) { THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "dimension %d out of range of %dD tensor", dim+TH_INDEX_BASE, THTensor_(nDimension)(self)); return self->size[dim]; } long THTensor_(stride)(const THTensor *self, int dim) { THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "dimension %d out of range of %dD tensor", dim+TH_INDEX_BASE, THTensor_(nDimension)(self)); return self->stride[dim]; } THLongStorage *THTensor_(newSizeOf)(THTensor *self) { THLongStorage *size = THLongStorage_newWithSize(self->nDimension); THLongStorage_rawCopy(size, self->size); return size; } THLongStorage *THTensor_(newStrideOf)(THTensor *self) { THLongStorage *stride = THLongStorage_newWithSize(self->nDimension); THLongStorage_rawCopy(stride, self->stride); return stride; } real *THTensor_(data)(const THTensor *self) { if(self->storage) return (self->storage->data+self->storageOffset); else return NULL; } void THTensor_(setFlag)(THTensor *self, const char flag) { self->flag |= flag; } void THTensor_(clearFlag)(THTensor *self, const char flag) { self->flag &= ~flag; } /**** creation methods ****/ static void THTensor_(rawInit)(THTensor *self); /* Empty init */ THTensor *THTensor_(new)(void) { THTensor *self = THAlloc(sizeof(THTensor)); THTensor_(rawInit)(self); return self; } /* Pointer-copy init */ THTensor *THTensor_(newWithTensor)(THTensor *tensor) { THTensor *self = THAlloc(sizeof(THTensor)); THTensor_(rawInit)(self); THTensor_(setStorageNd)(self, tensor->storage, tensor->storageOffset, tensor->nDimension, tensor->size, tensor->stride); return self; } /* Storage init */ THTensor *THTensor_(newWithStorage)(THStorage *storage, ptrdiff_t storageOffset, THLongStorage *size, THLongStorage *stride) { THTensor *self = THAlloc(sizeof(THTensor)); if(size && stride) THArgCheck(size->size == stride->size, 4, "inconsistent size"); THTensor_(rawInit)(self); #ifdef DEBUG THAssert((size ? size->size : (stride ? stride->size : 0)) <= INT_MAX); #endif THTensor_(setStorageNd)(self, storage, storageOffset, (size ? size->size : (stride ? stride->size : 0)), (size ? size->data : NULL), (stride ? stride->data : NULL)); return self; } THTensor *THTensor_(newWithStorage1d)(THStorage *storage, ptrdiff_t storageOffset, long size0, long stride0) { return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, -1, -1, -1, -1, -1, -1); } THTensor *THTensor_(newWithStorage2d)(THStorage *storage, ptrdiff_t storageOffset, long size0, long stride0, long size1, long stride1) { return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, size1, stride1, -1, -1, -1, -1); } THTensor *THTensor_(newWithStorage3d)(THStorage *storage, ptrdiff_t storageOffset, long size0, long stride0, long size1, long stride1, long size2, long stride2) { return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, size1, stride1, size2, stride2, -1, -1); } THTensor *THTensor_(newWithStorage4d)(THStorage *storage, ptrdiff_t storageOffset, long size0, long stride0, long size1, long stride1, long size2, long stride2, long size3, long stride3) { long size[4] = {size0, size1, size2, size3}; long stride[4] = {stride0, stride1, stride2, stride3}; THTensor *self = THAlloc(sizeof(THTensor)); THTensor_(rawInit)(self); THTensor_(setStorageNd)(self, storage, storageOffset, 4, size, stride); return self; } THTensor *THTensor_(newWithSize)(THLongStorage *size, THLongStorage *stride) { return THTensor_(newWithStorage)(NULL, 0, size, stride); } THTensor *THTensor_(newWithSize1d)(long size0) { return THTensor_(newWithSize4d)(size0, -1, -1, -1); } THTensor *THTensor_(newWithSize2d)(long size0, long size1) { return THTensor_(newWithSize4d)(size0, size1, -1, -1); } THTensor *THTensor_(newWithSize3d)(long size0, long size1, long size2) { return THTensor_(newWithSize4d)(size0, size1, size2, -1); } THTensor *THTensor_(newWithSize4d)(long size0, long size1, long size2, long size3) { long size[4] = {size0, size1, size2, size3}; THTensor *self = THAlloc(sizeof(THTensor)); THTensor_(rawInit)(self); THTensor_(resizeNd)(self, 4, size, NULL); return self; } THTensor *THTensor_(newClone)(THTensor *self) { THTensor *tensor = THTensor_(new)(); THTensor_(resizeAs)(tensor, self); THTensor_(copy)(tensor, self); return tensor; } THTensor *THTensor_(newContiguous)(THTensor *self) { if(!THTensor_(isContiguous)(self)) return THTensor_(newClone)(self); else { THTensor_(retain)(self); return self; } } THTensor *THTensor_(newSelect)(THTensor *tensor, int dimension_, long sliceIndex_) { THTensor *self = THTensor_(newWithTensor)(tensor); THTensor_(select)(self, NULL, dimension_, sliceIndex_); return self; } THTensor *THTensor_(newNarrow)(THTensor *tensor, int dimension_, long firstIndex_, long size_) { THTensor *self = THTensor_(newWithTensor)(tensor); THTensor_(narrow)(self, NULL, dimension_, firstIndex_, size_); return self; } THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int dimension2_) { THTensor *self = THTensor_(newWithTensor)(tensor); THTensor_(transpose)(self, NULL, dimension1_, dimension2_); return self; } THTensor *THTensor_(newUnfold)(THTensor *tensor, int dimension_, long size_, long step_) { THTensor *self = THTensor_(newWithTensor)(tensor); THTensor_(unfold)(self, NULL, dimension_, size_, step_); return self; } THTensor *THTensor_(newView)(THTensor *tensor, THLongStorage *size) { THArgCheck(THTensor_(isContiguous)(tensor), 1, "input is not contiguous"); ptrdiff_t numel = THTensor_(nElement)(tensor); THTensor *self = THTensor_(new)(); THLongStorage *inferred_size = THLongStorage_newInferSize(size, numel); THTensor_(setStorage)(self, tensor->storage, tensor->storageOffset, inferred_size, NULL); THLongStorage_free(inferred_size); return self; } /* Resize */ void THTensor_(resize)(THTensor *self, THLongStorage *size, THLongStorage *stride) { THArgCheck(size != NULL, 2, "invalid size"); if(stride) THArgCheck(stride->size == size->size, 3, "invalid stride"); #ifdef DEBUG THAssert(size->size <= INT_MAX); #endif THTensor_(resizeNd)(self, size->size, size->data, (stride ? stride->data : NULL)); } void THTensor_(resizeAs)(THTensor *self, THTensor *src) { if(!THTensor_(isSameSizeAs)(self, src)) THTensor_(resizeNd)(self, src->nDimension, src->size, NULL); } void THTensor_(resize1d)(THTensor *tensor, long size0) { THTensor_(resize4d)(tensor, size0, -1, -1, -1); } void THTensor_(resize2d)(THTensor *tensor, long size0, long size1) { THTensor_(resize4d)(tensor, size0, size1, -1, -1); } void THTensor_(resize3d)(THTensor *tensor, long size0, long size1, long size2) { THTensor_(resize4d)(tensor, size0, size1, size2, -1); } void THTensor_(resize4d)(THTensor *self, long size0, long size1, long size2, long size3) { long size[4] = {size0, size1, size2, size3}; THTensor_(resizeNd)(self, 4, size, NULL); } void THTensor_(resize5d)(THTensor *self, long size0, long size1, long size2, long size3, long size4) { long size[5] = {size0, size1, size2, size3, size4}; THTensor_(resizeNd)(self, 5, size, NULL); } THTensor* THTensor_(newExpand)(THTensor *tensor, THLongStorage *sizes) { THTensor *result = THTensor_(new)(); THTensor_(expand)(result, tensor, sizes); return result; } void THTensor_(expand)(THTensor *r, THTensor *tensor, THLongStorage *sizes) { THArgCheck(THTensor_(nDimension)(tensor) > 0, 0, "can't expand an empty tensor"); THArgCheck(THLongStorage_size(sizes) >= THTensor_(nDimension)(tensor), 1, "the number of sizes provided must be greater or equal to the " "number of dimensions in the tensor"); long *expandedSizes; long *expandedStrides; char error_buffer[1024]; int ret = THLongStorage_inferExpandGeometry(tensor->size, tensor->stride, THTensor_(nDimension)(tensor), sizes, &expandedSizes, &expandedStrides, error_buffer, 1024); if (ret != 0) { THError(error_buffer); return; } THTensor_(setStorageNd)(r, THTensor_(storage)(tensor), THTensor_(storageOffset)(tensor), THLongStorage_size(sizes), expandedSizes, expandedStrides); THFree(expandedSizes); THFree(expandedStrides); } void THTensor_(expandNd)(THTensor **rets, THTensor **ops, int count) { for (int i = 0; i < count; ++i) { THArgCheck(THTensor_(nDimension)(ops[i]) > 0, i, "can't expand empty tensor %d", i); } long **op_sizes = THAlloc(sizeof(long*) * count); long *op_dims = THAlloc(sizeof(long) * count); for (int i = 0; i < count; ++i) { op_sizes[i] = ops[i]->size; op_dims[i] = ops[i]->nDimension; } THLongStorage *sizes = THLongStorage_new(); char error_buffer[1024]; int ret = THLongStorage_inferSizeN(sizes, count, op_sizes, op_dims, error_buffer, 1024); if(ret != 0) { THFree(op_sizes); THFree(op_dims); THLongStorage_free(sizes); THError(error_buffer); return; } for (int i = 0; i < count; ++i) { THTensor_(expand)(rets[i], ops[i], sizes); } THFree(op_sizes); THFree(op_dims); THLongStorage_free(sizes); } void THTensor_(set)(THTensor *self, THTensor *src) { if(self != src) THTensor_(setStorageNd)(self, src->storage, src->storageOffset, src->nDimension, src->size, src->stride); } void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_) { if(size_ && stride_) THArgCheck(size_->size == stride_->size, 5, "inconsistent size/stride sizes"); #ifdef DEBUG THAssert((size_ ? size_->size : (stride_ ? stride_->size : 0)) <= INT_MAX); #endif THTensor_(setStorageNd)(self, storage_, storageOffset_, (size_ ? size_->size : (stride_ ? stride_->size : 0)), (size_ ? size_->data : NULL), (stride_ ? stride_->data : NULL)); } void THTensor_(setStorage1d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_) { THTensor_(setStorage4d)(self, storage_, storageOffset_, size0_, stride0_, -1, -1, -1, -1, -1, -1); } void THTensor_(setStorage2d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_, long size1_, long stride1_) { THTensor_(setStorage4d)(self, storage_, storageOffset_, size0_, stride0_, size1_, stride1_, -1, -1, -1, -1); } void THTensor_(setStorage3d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_) { THTensor_(setStorage4d)(self, storage_, storageOffset_, size0_, stride0_, size1_, stride1_, size2_, stride2_, -1, -1); } void THTensor_(setStorage4d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_, long size3_, long stride3_) { long size[4] = {size0_, size1_, size2_, size3_}; long stride[4] = {stride0_, stride1_, stride2_, stride3_}; THTensor_(setStorageNd)(self, storage_, storageOffset_, 4, size, stride); } void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension, long firstIndex, long size) { if(!src) src = self; THArgCheck( (dimension >= 0) && (dimension < src->nDimension), 2, "out of range"); THArgCheck( (firstIndex >= 0) && (firstIndex < src->size[dimension]), 3, "out of range"); THArgCheck( (size > 0) && (firstIndex <= src->size[dimension] - size), 4, "out of range"); THTensor_(set)(self, src); if(firstIndex > 0) self->storageOffset += firstIndex*self->stride[dimension]; self->size[dimension] = size; } void THTensor_(select)(THTensor *self, THTensor *src, int dimension, long sliceIndex) { int d; if(!src) src = self; THArgCheck(src->nDimension > 1, 1, "cannot select on a vector"); THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, "out of range"); THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size[dimension]), 3, "out of range"); THTensor_(set)(self, src); THTensor_(narrow)(self, NULL, dimension, sliceIndex, 1); for(d = dimension; d < self->nDimension-1; d++) { self->size[d] = self->size[d+1]; self->stride[d] = self->stride[d+1]; } self->nDimension--; } void THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1, int dimension2) { long z; if(!src) src = self; THArgCheck( (dimension1 >= 0) && (dimension1 < src->nDimension), 1, "out of range"); THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 2, "out of range"); THTensor_(set)(self, src); if(dimension1 == dimension2) return; z = self->stride[dimension1]; self->stride[dimension1] = self->stride[dimension2]; self->stride[dimension2] = z; z = self->size[dimension1]; self->size[dimension1] = self->size[dimension2]; self->size[dimension2] = z; } void THTensor_(unfold)(THTensor *self, THTensor *src, int dimension, long size, long step) { long *newSize; long *newStride; int d; if(!src) src = self; THArgCheck( (src->nDimension > 0), 1, "cannot unfold an empty tensor"); THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, "out of range"); THArgCheck(size <= src->size[dimension], 3, "out of range"); THArgCheck(step > 0, 4, "invalid step"); THTensor_(set)(self, src); newSize = THAlloc(sizeof(long)*(self->nDimension+1)); newStride = THAlloc(sizeof(long)*(self->nDimension+1)); newSize[self->nDimension] = size; newStride[self->nDimension] = self->stride[dimension]; for(d = 0; d < self->nDimension; d++) { if(d == dimension) { newSize[d] = (self->size[d] - size) / step + 1; newStride[d] = step*self->stride[d]; } else { newSize[d] = self->size[d]; newStride[d] = self->stride[d]; } } THFree(self->size); THFree(self->stride); self->size = newSize; self->stride = newStride; self->nDimension++; } /* we have to handle the case where the result is a number */ void THTensor_(squeeze)(THTensor *self, THTensor *src) { int ndim = 0; int d; if(!src) src = self; THTensor_(set)(self, src); for(d = 0; d < src->nDimension; d++) { if(src->size[d] != 1) { if(d != ndim) { self->size[ndim] = src->size[d]; self->stride[ndim] = src->stride[d]; } ndim++; } } /* right now, we do not handle 0-dimension tensors */ if(ndim == 0 && src->nDimension > 0) { self->size[0] = 1; self->stride[0] = 1; ndim = 1; } self->nDimension = ndim; } void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension) { int d; if(!src) src = self; THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, "dimension out of range"); THTensor_(set)(self, src); if(src->size[dimension] == 1 && src->nDimension > 1) { for(d = dimension; d < self->nDimension-1; d++) { self->size[d] = self->size[d+1]; self->stride[d] = self->stride[d+1]; } self->nDimension--; } } void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension) { int d; if(!src) src = self; THArgCheck((dimension >= 0) && (dimension <= src->nDimension), 2, "dimension out of range"); THArgCheck(src->nDimension > 0, 2, "cannot unsqueeze empty tensor"); THTensor_(set)(self, src); self->size = (long*)THRealloc(self->size, sizeof(long)*(self->nDimension+1)); self->stride = (long*)THRealloc(self->stride, sizeof(long)*(self->nDimension+1)); self->nDimension++; for (d = self->nDimension-1; d > dimension; d--) { self->size[d] = self->size[d-1]; self->stride[d] = self->stride[d-1]; } if (dimension+1 < self->nDimension) { self->stride[dimension] = self->size[dimension+1] * self->stride[dimension+1]; } else { self->stride[dimension] = 1; } self->size[dimension] = 1; } int THTensor_(isTransposed)(const THTensor *self) { if (THTensor_(isContiguous)(self)) { return 0; } long max_stride = 1; long size_max_stride = 1; long z = 1; int d; for (d = 0; d < self->nDimension; ++d) { if (self->stride[d] == 0 && self->size[d] != 1) return 0; if (self->stride[d] > max_stride) { max_stride = self->stride[d]; size_max_stride = self->size[d]; } z *= self->size[d]; } if (z == max_stride * size_max_stride) { return 1; } return 0; } int THTensor_(isContiguous)(const THTensor *self) { long z = 1; int d; for(d = self->nDimension-1; d >= 0; d--) { if(self->size[d] != 1) { if(self->stride[d] == z) z *= self->size[d]; else return 0; } } return 1; } int THTensor_(isSize)(const THTensor *self, const THLongStorage *dims) { int d; if (self->nDimension != dims->size) return 0; for(d = 0; d < self->nDimension; ++d) { if(self->size[d] != dims->data[d]) return 0; } return 1; } int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor* src) { int d; if (self->nDimension != src->nDimension) return 0; for(d = 0; d < self->nDimension; ++d) { if(self->size[d] != src->size[d]) return 0; } return 1; } int THTensor_(isSetTo)(const THTensor *self, const THTensor* src) { if (!self->storage) return 0; if (self->storage == src->storage && self->storageOffset == src->storageOffset && self->nDimension == src->nDimension) { int d; for (d = 0; d < self->nDimension; ++d) { if (self->size[d] != src->size[d] || self->stride[d] != src->stride[d]) return 0; } return 1; } return 0; } ptrdiff_t THTensor_(nElement)(const THTensor *self) { if(self->nDimension == 0) return 0; else { ptrdiff_t nElement = 1; int d; for(d = 0; d < self->nDimension; d++) nElement *= self->size[d]; return nElement; } } void THTensor_(retain)(THTensor *self) { if(self->flag & TH_TENSOR_REFCOUNTED) THAtomicIncrementRef(&self->refcount); } void THTensor_(free)(THTensor *self) { if(!self) return; if(self->flag & TH_TENSOR_REFCOUNTED) { if(THAtomicDecrementRef(&self->refcount)) { THFree(self->size); THFree(self->stride); if(self->storage) THStorage_(free)(self->storage); THFree(self); } } } void THTensor_(freeCopyTo)(THTensor *self, THTensor *dst) { if(self != dst) THTensor_(copy)(dst, self); THTensor_(free)(self); } /*******************************************************************************/ static void THTensor_(rawInit)(THTensor *self) { self->refcount = 1; self->storage = NULL; self->storageOffset = 0; self->size = NULL; self->stride = NULL; self->nDimension = 0; self->flag = TH_TENSOR_REFCOUNTED; } void THTensor_(setStorageNd)(THTensor *self, THStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride) { /* storage */ if(self->storage != storage) { if(self->storage) THStorage_(free)(self->storage); if(storage) { self->storage = storage; THStorage_(retain)(self->storage); } else self->storage = NULL; } /* storageOffset */ if(storageOffset < 0) THError("Tensor: invalid storage offset"); self->storageOffset = storageOffset; /* size and stride */ THTensor_(resizeNd)(self, nDimension, size, stride); } void THTensor_(resizeNd)(THTensor *self, int nDimension, long *size, long *stride) { int d; int nDimension_; ptrdiff_t totalSize; int hascorrectsize = 1; nDimension_ = 0; for(d = 0; d < nDimension; d++) { if(size[d] > 0) { nDimension_++; if((self->nDimension > d) && (size[d] != self->size[d])) hascorrectsize = 0; if((self->nDimension > d) && stride && (stride[d] >= 0) && (stride[d] != self->stride[d])) hascorrectsize = 0; } else break; } nDimension = nDimension_; if(nDimension != self->nDimension) hascorrectsize = 0; if(hascorrectsize) return; if(nDimension > 0) { if(nDimension != self->nDimension) { self->size = THRealloc(self->size, sizeof(long)*nDimension); self->stride = THRealloc(self->stride, sizeof(long)*nDimension); self->nDimension = nDimension; } totalSize = 1; for(d = self->nDimension-1; d >= 0; d--) { self->size[d] = size[d]; if(stride && (stride[d] >= 0) ) self->stride[d] = stride[d]; else { if(d == self->nDimension-1) self->stride[d] = 1; else self->stride[d] = self->size[d+1]*self->stride[d+1]; } totalSize += (self->size[d]-1)*self->stride[d]; } if(totalSize+self->storageOffset > 0) { if(!self->storage) self->storage = THStorage_(new)(); if(totalSize+self->storageOffset > self->storage->size) THStorage_(resize)(self->storage, totalSize+self->storageOffset); } } else self->nDimension = 0; } void THTensor_(set1d)(THTensor *tensor, long x0, real value) { THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range"); THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0], value); } real THTensor_(get1d)(const THTensor *tensor, long x0) { THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range"); return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]); } void THTensor_(set2d)(THTensor *tensor, long x0, long x1, real value) { THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range"); THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1], value); } real THTensor_(get2d)(const THTensor *tensor, long x0, long x1) { THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range"); return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]); } void THTensor_(set3d)(THTensor *tensor, long x0, long x1, long x2, real value) { THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range"); THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2], value); } real THTensor_(get3d)(const THTensor *tensor, long x0, long x1, long x2) { THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range"); return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]); } void THTensor_(set4d)(THTensor *tensor, long x0, long x1, long x2, long x3, real value) { THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range"); THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3], value); } real THTensor_(get4d)(const THTensor *tensor, long x0, long x1, long x2, long x3) { THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range"); return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3]); } THDescBuff THTensor_(desc)(const THTensor *tensor) { const int L = TH_DESC_BUFF_LEN; THDescBuff buf; char *str = buf.str; int n = 0; #define _stringify(x) #x n += snprintf(str, L-n, "torch." _stringify(x) "Tensor of size "); #undef _stringify int i; for(i = 0; i < tensor->nDimension; i++) { if(n >= L) break; n += snprintf(str+n, L-n, "%ld", tensor->size[i]); if(i < tensor->nDimension-1) { n += snprintf(str+n, L-n, "x"); } } if(n >= L) { snprintf(str+L-4, 4, "..."); } return buf; } THDescBuff THTensor_(sizeDesc)(const THTensor *tensor) { THLongStorage *size = THTensor_(newSizeOf)((THTensor*)tensor); THDescBuff buf = THLongStorage_sizeDesc(size); THLongStorage_free(size); return buf; } #endif lib/TH/generic/THTensor.h000066400000000000000000000166031316246254300154460ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensor.h" #else /* a la lua? dim, storageoffset, ... et les methodes ? */ #define TH_TENSOR_REFCOUNTED 1 typedef struct THTensor { long *size; long *stride; int nDimension; THStorage *storage; ptrdiff_t storageOffset; int refcount; char flag; } THTensor; /**** access methods ****/ TH_API THStorage* THTensor_(storage)(const THTensor *self); TH_API ptrdiff_t THTensor_(storageOffset)(const THTensor *self); TH_API int THTensor_(nDimension)(const THTensor *self); TH_API long THTensor_(size)(const THTensor *self, int dim); TH_API long THTensor_(stride)(const THTensor *self, int dim); TH_API THLongStorage *THTensor_(newSizeOf)(THTensor *self); TH_API THLongStorage *THTensor_(newStrideOf)(THTensor *self); TH_API real *THTensor_(data)(const THTensor *self); TH_API void THTensor_(setFlag)(THTensor *self, const char flag); TH_API void THTensor_(clearFlag)(THTensor *self, const char flag); /**** creation methods ****/ TH_API THTensor *THTensor_(new)(void); TH_API THTensor *THTensor_(newWithTensor)(THTensor *tensor); /* stride might be NULL */ TH_API THTensor *THTensor_(newWithStorage)(THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_); TH_API THTensor *THTensor_(newWithStorage1d)(THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_); TH_API THTensor *THTensor_(newWithStorage2d)(THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_, long size1_, long stride1_); TH_API THTensor *THTensor_(newWithStorage3d)(THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_); TH_API THTensor *THTensor_(newWithStorage4d)(THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_, long size3_, long stride3_); /* stride might be NULL */ TH_API THTensor *THTensor_(newWithSize)(THLongStorage *size_, THLongStorage *stride_); TH_API THTensor *THTensor_(newWithSize1d)(long size0_); TH_API THTensor *THTensor_(newWithSize2d)(long size0_, long size1_); TH_API THTensor *THTensor_(newWithSize3d)(long size0_, long size1_, long size2_); TH_API THTensor *THTensor_(newWithSize4d)(long size0_, long size1_, long size2_, long size3_); TH_API THTensor *THTensor_(newClone)(THTensor *self); TH_API THTensor *THTensor_(newContiguous)(THTensor *tensor); TH_API THTensor *THTensor_(newSelect)(THTensor *tensor, int dimension_, long sliceIndex_); TH_API THTensor *THTensor_(newNarrow)(THTensor *tensor, int dimension_, long firstIndex_, long size_); TH_API THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int dimension2_); TH_API THTensor *THTensor_(newUnfold)(THTensor *tensor, int dimension_, long size_, long step_); TH_API THTensor *THTensor_(newView)(THTensor *tensor, THLongStorage *size); TH_API THTensor *THTensor_(newExpand)(THTensor *tensor, THLongStorage *size); TH_API void THTensor_(expand)(THTensor *r, THTensor *tensor, THLongStorage *size); TH_API void THTensor_(expandNd)(THTensor **rets, THTensor **ops, int count); TH_API void THTensor_(resize)(THTensor *tensor, THLongStorage *size, THLongStorage *stride); TH_API void THTensor_(resizeAs)(THTensor *tensor, THTensor *src); TH_API void THTensor_(resizeNd)(THTensor *tensor, int nDimension, long *size, long *stride); TH_API void THTensor_(resize1d)(THTensor *tensor, long size0_); TH_API void THTensor_(resize2d)(THTensor *tensor, long size0_, long size1_); TH_API void THTensor_(resize3d)(THTensor *tensor, long size0_, long size1_, long size2_); TH_API void THTensor_(resize4d)(THTensor *tensor, long size0_, long size1_, long size2_, long size3_); TH_API void THTensor_(resize5d)(THTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_); TH_API void THTensor_(set)(THTensor *self, THTensor *src); TH_API void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_); TH_API void THTensor_(setStorageNd)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, int nDimension, long *size, long *stride); TH_API void THTensor_(setStorage1d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_); TH_API void THTensor_(setStorage2d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_, long size1_, long stride1_); TH_API void THTensor_(setStorage3d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_); TH_API void THTensor_(setStorage4d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_, long size3_, long stride3_); TH_API void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension_, long firstIndex_, long size_); TH_API void THTensor_(select)(THTensor *self, THTensor *src, int dimension_, long sliceIndex_); TH_API void THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1_, int dimension2_); TH_API void THTensor_(unfold)(THTensor *self, THTensor *src, int dimension_, long size_, long step_); TH_API void THTensor_(squeeze)(THTensor *self, THTensor *src); TH_API void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension_); TH_API void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension_); TH_API int THTensor_(isContiguous)(const THTensor *self); TH_API int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor *src); TH_API int THTensor_(isSetTo)(const THTensor *self, const THTensor *src); TH_API int THTensor_(isSize)(const THTensor *self, const THLongStorage *dims); TH_API ptrdiff_t THTensor_(nElement)(const THTensor *self); TH_API void THTensor_(retain)(THTensor *self); TH_API void THTensor_(free)(THTensor *self); TH_API void THTensor_(freeCopyTo)(THTensor *self, THTensor *dst); /* Slow access methods [check everything] */ TH_API void THTensor_(set1d)(THTensor *tensor, long x0, real value); TH_API void THTensor_(set2d)(THTensor *tensor, long x0, long x1, real value); TH_API void THTensor_(set3d)(THTensor *tensor, long x0, long x1, long x2, real value); TH_API void THTensor_(set4d)(THTensor *tensor, long x0, long x1, long x2, long x3, real value); TH_API real THTensor_(get1d)(const THTensor *tensor, long x0); TH_API real THTensor_(get2d)(const THTensor *tensor, long x0, long x1); TH_API real THTensor_(get3d)(const THTensor *tensor, long x0, long x1, long x2); TH_API real THTensor_(get4d)(const THTensor *tensor, long x0, long x1, long x2, long x3); /* Debug methods */ TH_API THDescBuff THTensor_(desc)(const THTensor *tensor); TH_API THDescBuff THTensor_(sizeDesc)(const THTensor *tensor); #endif lib/TH/generic/THTensorConv.c000066400000000000000000001751631316246254300162760ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensorConv.c" #else /* 2D Input, 2D kernel : convolve given image with the given kernel. */ void THTensor_(validXCorr2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc) { long or = (ir - kr) / sr + 1; long oc = (ic - kc) / sc + 1; long xx, yy, kx, ky; if ((sc != 1) || (oc < 4)) { /* regular convolution */ for(yy = 0; yy < or; yy++) { for(xx = 0; xx < oc; xx++) { /* Dot product in two dimensions... (between input image and the mask) */ real *pi_ = t_ + yy*sr*ic + xx*sc; real *pw_ = k_; real sum = 0; for(ky = 0; ky < kr; ky++) { for(kx = 0; kx < kc; kx++) { sum += pi_[kx]*pw_[kx]; } pi_ += ic; /* next input line */ pw_ += kc; /* next mask line */ } /* Update output */ *r_++ += alpha*sum; } } } else { /* SSE-based convolution */ for(yy = 0; yy < or; yy++) { real *pi_ = t_ + yy*sr*ic; real *pw_ = k_; for (ky = 0; ky < kr; ky++) { real *pis_ = pi_; for (kx = 0; kx < kc; kx++) { THVector_(cadd)(r_, r_, pis_, alpha*pw_[kx], oc); pis_++; } pi_ += ic; /* next input line */ pw_ += kc; /* next mask line */ } r_ += oc; } } } /* 2D Input, 2D kernel : convolve given image with the given kernel. */ void THTensor_(validConv2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc) { long or = (ir - kr) / sr + 1; long oc = (ic - kc) / sc + 1; long xx, yy, kx, ky; if ((sc != 1) || (oc < 4)) { /* regular convolution */ for(yy = 0; yy < or; yy++) { for(xx = 0; xx < oc; xx++) { /* Dot product in two dimensions... (between input image and the mask) */ real *pi_ = t_ + yy*sr*ic + xx*sc; real *pw_ = k_ + kr*kc - 1; real sum = 0; for(ky = 0; ky < kr; ky++) { for(kx = 0; kx < kc; kx++) { sum += pi_[kx]*pw_[-kx]; } pi_ += ic; /* next input line */ pw_ -= kc; /* next mask line */ } /* Update output */ *r_++ += alpha*sum; } } } else { /* SSE-based convolution */ for(yy = 0; yy < or; yy++) { real *pw_ = k_ + kr*kc - 1; real *pi_ = t_ + yy*sr*ic; for (ky = 0; ky < kr; ky++) { real *pis_ = pi_; for (kx = 0; kx < kc; kx++) { THVector_(cadd)(r_, r_, pis_, alpha*pw_[-kx], oc); pis_++; } pi_ += ic; /* next input line */ pw_ -= kc; /* next mask line */ } r_ += oc; } } } /* 2D Input, 2D kernel : convolve given image with the given kernel, full convolution. */ void THTensor_(fullConv2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc) { long oc = (ic - 1) * sc + kc; long xx, yy, kx, ky; if ((sc != 1) || (ic < 4)) { /* regular convolution */ for(yy = 0; yy < ir; yy++) { for(xx = 0; xx < ic; xx++) { /* Outer product in two dimensions... (between input image and the mask) */ real *po_ = r_ + yy*sr*oc + xx*sc; real *pw_ = k_; for(ky = 0; ky < kr; ky++) { real z = *t_ * alpha; for(kx = 0; kx < kc; kx++) { po_[kx] += z * pw_[kx]; } po_ += oc; /* next input line */ pw_ += kc; /* next mask line */ } t_++; } } } else { /* SSE-based convolution */ for(yy = 0; yy < ir; yy++) { real *po_ = r_ + yy*sr*oc; real *pw_ = k_; for (ky = 0; ky < kr; ky++) { real *pos_ = po_; for (kx = 0; kx < kc; kx++) { THVector_(cadd)(pos_, pos_, t_, alpha*pw_[kx], ic); pos_++; } po_ += oc; /* next input line */ pw_ += kc; /* next mask line */ } t_ += ic; } } } /* 2D Input, 2D kernel : convolve given image with the given kernel, full convolution. */ void THTensor_(fullXCorr2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc) { long oc = (ic - 1) * sc + kc; long xx, yy, kx, ky; if ((sc != 1) || (ic < 4)) { /* regular convolution */ for(yy = 0; yy < ir; yy++) { for(xx = 0; xx < ic; xx++) { /* Outer product in two dimensions... (between input image and the mask) */ real *po_ = r_ + yy*sr*oc + xx*sc; real *pw_ = k_ + kr*kc -1; long kx, ky; for(ky = 0; ky < kr; ky++) { real z = *t_ * alpha; for(kx = 0; kx < kc; kx++) { po_[kx] += z * pw_[-kx]; } po_ += oc; /* next input line */ pw_ -= kc; /* next mask line */ } t_++; } } } else { /* SSE-based convolution */ for(yy = 0; yy < ir; yy++) { real *po_ = r_ + yy*sr*oc; real *pw_ = k_ + kr*kc -1; for (ky = 0; ky < kr; ky++) { real *pos_ = po_; for (kx = 0; kx < kc; kx++) { THVector_(cadd)(pos_, pos_, t_, pw_[-kx]*alpha, ic); pos_++; } po_ += oc; /* next input line */ pw_ -= kc; /* next mask line */ } t_ += ic; } } } /* 2D Input, 2D kernel : convolve given image with the given kernel, valid convolution. for sr,sc=1 this is equivalent to validXCorr2Dptr, but otherwise it is useful for calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 */ void THTensor_(validXCorr2DRevptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc) { long or = ir - (kr - 1) * sr; long oc = ic - (kc - 1) * sc; long xx, yy, kx, ky; if ((sc != 1) || (kc < 4)) { /* regular convolution */ for(yy = 0; yy < kr; yy++) { for(xx = 0; xx < kc; xx++) { real *po_ = r_; real *pi_ = t_ + yy*sr*ic + xx*sc; real z = *k_++ * alpha; for(ky = 0; ky < or; ky++) { for(kx = 0; kx < oc; kx++) po_[kx] += z * pi_[kx]; pi_ += ic; po_ += oc; } } } } else { /* SSE-based convolution */ for(yy = 0; yy < kr; yy++) { for(xx = 0; xx < kc; xx++) { real *po_ = r_; real *pi_ = t_ + yy*sr*ic + xx*sc; real z = *k_++ * alpha; for(ky = 0; ky < or; ky++) { THVector_(cadd)(po_, po_, pi_, z, oc); pi_ += ic; po_ += oc; } } } } } /* 3D Input, 3D kernel : convolve given volume with the given kernel. */ void THTensor_(validXCorr3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc) { long ot = (it - kt) / st + 1; long or = (ir - kr) / sr + 1; long oc = (ic - kc) / sc + 1; long zz, xx, yy; for (zz = 0; zz < ot; zz++) { for(yy = 0; yy < or; yy++) { for(xx = 0; xx < oc; xx++) { /* Dot product in two dimensions... (between input image and the mask) */ real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; real *pw_ = k_; real sum = 0; long kz, kx, ky; for(kz = 0; kz < kt; kz++) { for(ky = 0; ky < kr; ky++) { for(kx = 0; kx < kc; kx++) { sum += pi_[kx]*pw_[kx]; } pi_ += ic; /* next input line */ pw_ += kc; /* next mask line */ } pi_ += (ir-kr)*ic; /* next input slice */ } /* Update output */ *r_++ += sum*alpha; } } } } /* 3D Input, 3D kernel : convolve given volume with the given kernel. */ void THTensor_(validConv3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc) { long ot = (it - kt) / st + 1; long or = (ir - kr) / sr + 1; long oc = (ic - kc) / sc + 1; long zz, xx, yy; for(zz = 0; zz < ot; zz++) { for(yy = 0; yy < or; yy++) { for(xx = 0; xx < oc; xx++) { /* Dot product in two dimensions... (between input image and the mask) */ real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; real *pw_ = k_ + kt*kr*kc - 1; real sum = 0; long kz, kx, ky; for(kz = 0; kz < kt; kz++) { for(ky = 0; ky < kr; ky++) { for(kx = 0; kx < kc; kx++) { sum += pi_[kx]*pw_[-kx]; } pi_ += ic; /* next input line */ pw_ -= kc; /* next mask line */ } pi_ += (ir-kr)*ic; /* next input slice */ } /* Update output */ *r_++ += alpha*sum; } } } } /* 3D Input, 3D kernel : convolve given volume with the given kernel, full convolution. */ void THTensor_(fullConv3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc) { long or = (ir - 1) * sr + kr; long oc = (ic - 1) * sc + kc; long zz, xx, yy; for(zz = 0; zz < it; zz++) { for(yy = 0; yy < ir; yy++) { for(xx = 0; xx < ic; xx++) { /* Outer product in two dimensions... (between input image and the mask) */ real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc; real *pw_ = k_; long kz, kx, ky; /* printf("Output Plane : %ld,%ld,%ld, input val=%g\n",zz,yy,xx,*t_); */ for(kz = 0; kz < kt; kz++) { for(ky = 0; ky < kr; ky++) { real z = *t_ * alpha; for(kx = 0; kx < kc; kx++) { /* printf("o=%g,k=%g," , po_[kx],pw_[kx]); */ po_[kx] += z * pw_[kx]; /* printf("o=%g " , po_[kx]); */ } /* printf("\n"); */ po_ += oc; /* next input line */ pw_ += kc; /* next mask line */ } po_ += (or-kr)*oc; /* next output slice */ /* printf("\n"); */ } t_++; } } } } /* 3D Input, 3D kernel : convolve given volume with the given kernel, full convolution. */ void THTensor_(fullXCorr3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc) { long or = (ir - 1) * sr + kr; long oc = (ic - 1) * sc + kc; long zz, xx, yy; for(zz = 0; zz < it; zz++) { for(yy = 0; yy < ir; yy++) { for(xx = 0; xx < ic; xx++) { /* Outer product in two dimensions... (between input image and the mask) */ real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc; real *pw_ = k_ + kt*kr*kc -1; long kz, kx, ky; for(kz = 0; kz < kt; kz++) { for(ky = 0; ky < kr; ky++) { real z = *t_ * alpha; for(kx = 0; kx < kc; kx++) { po_[kx] += z * pw_[-kx]; } po_ += oc; /* next input line */ pw_ -= kc; /* next mask line */ } po_ += (or-kr)*oc; /* next output slice */ } t_++; } } } } /* 3D Input, 3D kernel : convolve given image with the given kernel, valid convolution. for sr,sc=1 this is equivalent to validXCorr3Dptr, but otherwise it is useful for calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 */ void THTensor_(validXCorr3DRevptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc) { long ot = it - (kt - 1) * st; long or = ir - (kr - 1) * sr; long oc = ic - (kc - 1) * sc; long zz, xx, yy; for(zz = 0; zz < kt; zz++) { for(yy = 0; yy < kr; yy++) { for(xx = 0; xx < kc; xx++) { real *po_ = r_; real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; real z = *k_++ * alpha; long kz, kx, ky; for(kz = 0; kz < ot; kz++) { for(ky = 0; ky < or; ky++) { for(kx = 0; kx < oc; kx++) po_[kx] += z * pi_[kx]; pi_ += ic; po_ += oc; } pi_ += (ir-or)*ic; /* next input slice */ } } } } } void THTensor_(conv2d)(real* output_data, real alpha, real* ptr_input, long nInputRows, long nInputCols, real* ptr_weight, long nKernelRows, long nKernelCols, long srow, long scol, const char *vf, const char *xc) { THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can be 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can be 'X' or 'C'"); if (*vf == 'F') if (*xc == 'X') THTensor_(fullXCorr2Dptr)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(fullConv2Dptr)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else if (*xc == 'X') THTensor_(validXCorr2Dptr)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(validConv2Dptr)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); } void THTensor_(conv3d)(real* output_data, real alpha, real* ptr_input, long nInputDepth, long nInputRows, long nInputCols, real* ptr_weight, long nKernelDepth, long nKernelRows, long nKernelCols, long sdepth, long srow, long scol, const char *vf, const char *xc) { THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can be 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can be 'X' or 'C'"); if (*vf == 'F') if (*xc == 'X') THTensor_(fullXCorr3Dptr)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol); else THTensor_(fullConv3Dptr)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol); else if (*xc == 'X') THTensor_(validXCorr3Dptr)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol); else THTensor_(validConv3Dptr)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol); } long THTensor_(convsize)(long x, long k, long s, const char* vf) { THArgCheck(*vf == 'V' || *vf == 'F', 1, "type of convolution can be 'V' or 'F'"); if (*vf == 'V') return (x-k)/s + 1; else return (x-1)*s + k; } /* 3D input, 3D kernel, 4D output like rank1 update A <- xx' + beta*A for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 */ void THTensor_(conv2DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol) { long nInputPlane, nInputRows, nInputCols; long nKernelPlane, nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; kstride0 = kernel->stride[0]; nKernelPlane = kernel->size[0]; nKernelRows = kernel->size[1]; nKernelCols = kernel->size[2]; nOutputPlane = nInputPlane * kernel->size[0]; THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "covn2DRevger : Input image is smaller than kernel"); nOutputRows = nInputRows - (nKernelRows - 1) * srow; nOutputCols = nInputCols - (nKernelCols - 1) * scol; nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { /*THTensor_(zero)(r_);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] = 0.0; } } else if (beta != 1) { /*THTensor_(mul)(r_, beta);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] *= beta; } } #pragma omp parallel for private(k) for(k = 0; k < nKernelPlane; k++) { long i; /* get kernel */ real *ptr_weight = weight_data+k*kstride0; for(i = 0; i < nInputPlane; i++) { /* get output */ real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; /* get input */ real *ptr_input = input_data+i*istride0; /* do image, kernel convolution */ THTensor_(validXCorr2DRevptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); /* Next output plane */ /* output_data += nOutputCols*nOutputRows; */ } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 3D kernel, 4D output like rank1 update A <- xx' + beta*A for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 */ void THTensor_(conv2DRevgerm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol) { long nbatch, nInputPlane, nInputRows, nInputCols; long nKernelPlane, nKernelRows, nKernelCols; long nOutputRows, nOutputCols; long istride0, kstride0, istride1, kstride1; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); istride0 = input->stride[0]; istride1 = input->stride[1]; nbatch = input->size[0]; nInputPlane = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; kstride1 = kernel->stride[1]; nKernelPlane = kernel->size[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "conv2DRevger : Input image is smaller than kernel"); THArgCheck(kernel->size[0] == input->size[0] , 2, "conv2DRevger : Input batch and kernel batch is not same size"); nOutputRows = nInputRows - (nKernelRows - 1) * srow; nOutputCols = nInputCols - (nKernelCols - 1) * scol; nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { /*THTensor_(zero)(r_);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] = 0.0; } } else if (beta != 1) { /*THTensor_(mul)(r_, beta);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] *= beta; } } #pragma omp parallel for private(k) for(k = 0; k < nKernelPlane; k++) { long i; for(i = 0; i < nInputPlane; i++) { long p; for(p = 0; p < nbatch; p++) { /* get kernel */ real *ptr_weight = weight_data + p*kstride0 + k*kstride1; /* get output */ real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; /* get input */ real *ptr_input = input_data + p*istride0 + i*istride1; /* do image, kernel convolution */ THTensor_(validXCorr2DRevptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); /* Next output plane */ /* output_data += nOutputCols*nOutputRows; */ } } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 3D kernel, 4D output like rank1 update A <- xx' + beta*A */ void THTensor_(conv2Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputRows, nInputCols; long nKernelPlane, nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; kstride0 = kernel->stride[0]; nKernelPlane = kernel->size[0]; nKernelRows = kernel->size[1]; nKernelCols = kernel->size[2]; nOutputPlane = nInputPlane * kernel->size[0]; THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dger : Input image is smaller than kernel"); if (*vf == 'F') { nOutputRows = (nInputRows - 1) * srow + nKernelRows; nOutputCols = (nInputCols - 1) * scol + nKernelCols; } else { /* valid */ nOutputRows = (nInputRows - nKernelRows) / srow + 1; nOutputCols = (nInputCols - nKernelCols) / scol + 1; } nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_, nKernelPlane, nInputPlane, nOutputRows, nOutputCols); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { /*THTensor_(zero)(r_);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] = 0.0; } } else if (beta != 1) { /*THTensor_(mul)(r_, beta);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] *= beta; } } #pragma omp parallel for private(k) for(k = 0; k < nKernelPlane; k++) { long i; /* get kernel */ real *ptr_weight = weight_data+k*kstride0; for(i = 0; i < nInputPlane; i++) { /* get output */ real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; /* get input */ real *ptr_input = input_data+i*istride0; /* do image, kernel convolution */ if (*vf == 'F') if (*xc == 'X') THTensor_(fullXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(fullConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else if (*xc == 'X') THTensor_(validXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(validConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); /* Next output plane */ /* output_data += nOutputCols*nOutputRows; */ } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 4D kernel, 3D output matrix vector product like y <- Ax + beta*y */ void THTensor_(conv2Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputRows, nInputCols; long nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long istride0, kstride0, kstride1; THTensor *input; THTensor* kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) { kernel = THTensor_(newContiguous)(k_); } else { THTensor_(retain)(k_); kernel = k_; } nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; kstride0 = kernel->stride[0]; kstride1 = kernel->stride[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; nOutputPlane = kernel->size[0]; THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmv : Input image is smaller than kernel"); if (*vf == 'F') { nOutputRows = (nInputRows - 1) * srow + nKernelRows; nOutputCols = (nInputCols - 1) * scol + nKernelCols; } else { /* valid */ nOutputRows = (nInputRows - nKernelRows) / srow + 1; nOutputCols = (nInputCols - nKernelCols) / scol + 1; } nelem = THTensor_(nElement)(r_); THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { /*THTensor_(zero)(r_);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] = 0.0; } } else if (beta != 1) { /*THTensor_(mul)(r_, beta);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] *= beta; } } #pragma omp parallel for private(k) for(k = 0; k < nOutputPlane; k++) { long i; /* get output */ real *ptr_output = output_data + k*nOutputCols*nOutputRows; for(i = 0; i < nInputPlane; i++) { /* get kernel */ real *ptr_weight = weight_data + k*kstride0 + i*kstride1; /* get input */ real *ptr_input = input_data + i*istride0; /* do image, kernel convolution */ if (*vf == 'F') if (*xc == 'X') THTensor_(fullXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(fullConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else if (*xc == 'X') THTensor_(validXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(validConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); } /* Next output plane */ /* output_data += nOutputCols*nOutputRows;*/ } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 4D kernel, 3D output matrix vector product like y <- Ax + beta*y */ void THTensor_(conv2Dmm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputRows, nInputCols; long nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long kstride0, kstride1; THTensor *input; THTensor* kernel; long nbatch; ptrdiff_t nelem; real *input_data; real *weight_data; real *output_data; long p; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) { kernel = THTensor_(newContiguous)(k_); } else { THTensor_(retain)(k_); kernel = k_; } nbatch = input->size[0]; nInputPlane = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; kstride1 = kernel->stride[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; nOutputPlane = kernel->size[0]; THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmv : Input image is smaller than kernel"); if (*vf == 'F') { nOutputRows = (nInputRows - 1) * srow + nKernelRows; nOutputCols = (nInputCols - 1) * scol + nKernelCols; } else { /* valid */ nOutputRows = (nInputRows - nKernelRows) / srow + 1; nOutputCols = (nInputCols - nKernelCols) / scol + 1; } nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_, nbatch, nOutputPlane, nOutputRows, nOutputCols); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { /*THTensor_(zero)(r_);*/ #pragma omp parallel for private(p) for (p=0; p < r_->size[0]; p++) { long k; for (k = 0; k < r_->size[1]; k++) { real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] = 0.0; } } } else if (beta != 1) { /*THTensor_(mul)(r_, beta);*/ #pragma omp parallel for private(p) for(p=0; p < r_->size[0]; p++) { long k; for (k = 0; k < r_->size[1]; k++) { real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] *= beta; } } } #pragma omp parallel for private(p) for(p=0; p < nbatch; p++) { long k; for(k = 0; k < nOutputPlane; k++) { long i; /* get output */ real *ptr_output = output_data + p*nOutputPlane*nOutputCols*nOutputRows + k*nOutputCols*nOutputRows; for(i = 0; i < nInputPlane; i++) { /* get kernel */ real *ptr_weight = weight_data + k*kstride0 + i*kstride1; /* get input */ real *ptr_input = input_data + p*nInputPlane*nInputRows*nInputCols + i*nInputRows*nInputCols; /* do image, kernel convolution */ if (*vf == 'F') if (*xc == 'X') THTensor_(fullXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(fullConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else if (*xc == 'X') THTensor_(validXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(validConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); } /* Next output plane */ /* output_data += nOutputCols*nOutputRows;*/ } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 2D input, 2D kernel, 2D output scalar multiplication like y <- x*y + beta*y */ void THTensor_(conv2Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) { THTensor *input; THTensor* kernel; long nInputRows; long nInputCols; long nKernelRows; long nKernelCols; long nOutputRows, nOutputCols; real *ptr_input; real *ptr_weight; real *output_data; ptrdiff_t nelem; THArgCheck(t_->nDimension == 2 , 3, "input: 2D Tensor expected"); THArgCheck(k_->nDimension == 2 , 4, "kernel: 2D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputRows = input->size[0]; nInputCols = input->size[1]; nKernelRows = kernel->size[0]; nKernelCols = kernel->size[1]; THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmul : Input image is smaller than kernel"); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize2d)(r_, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) THTensor_(zero)(r_); else if (beta != 1) THTensor_(mul)(r_, r_, beta); ptr_input = THTensor_(data)(input); ptr_weight = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); /* do image, kernel convolution */ THTensor_(conv2d)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol, vf, xc); THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 3D kernel, 3D output component wise multiplication like y <- y.*x + beta*y */ void THTensor_(conv2Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputRows, nInputCols; long nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); istride0 = input->stride[0]; nInputPlane = input->size[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; kstride0 = kernel->stride[0]; nOutputPlane = kernel->size[0]; nKernelRows = kernel->size[1]; nKernelCols = kernel->size[2]; THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dcmul : Input image is smaller than kernel"); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); for(k = 0; k < nOutputPlane; k++) { /* get kernel */ real *ptr_weight = weight_data + k*kstride0; /* get input */ real *ptr_input = input_data + k*istride0; /* do image, kernel convolution */ THTensor_(conv2d)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol, vf, xc); /* Next output plane */ output_data += nOutputCols*nOutputRows; } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 3D kernel, 3D output component wise multiplication like with a permutation map y <- y.*x + beta*y */ void THTensor_(conv2Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputRows, nInputCols; long nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor* kernel; real *input_data; real *weight_data; real *output_data; long nmaps; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); THArgCheck(map->nDimension == 2 , 4, "map: 2D Tensor expected"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); istride0 = input->stride[0]; nInputPlane = input->size[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; kstride0 = kernel->stride[0]; nOutputPlane = kernel->size[0]; nKernelRows = kernel->size[1]; nKernelCols = kernel->size[2]; THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmap : Input image is smaller than kernel"); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); nmaps = map->size[0]; for(k = 0; k < nmaps; k++) { /* get indices */ long from = (long)THTensor_(get2d)(map,k,0)-1; long to = (long)THTensor_(get2d)(map,k,1)-1; /* get kernel */ real *ptr_weight = weight_data + k*kstride0; /* get input */ real *ptr_input = input_data + from*istride0; /* get output */ real *ptr_output = output_data + to*nOutputRows*nOutputCols; /* do image, kernel convolution */ THTensor_(conv2d)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol, vf, xc); } THTensor_(free)(input); THTensor_(free)(kernel); } /* 4D input, 4D kernel, 5D output like rank1 update A <- xx' + beta*A for sr,sc=1 this is equivalent to xcorr2Dger, but otherwise it is useful for calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 */ void THTensor_(conv3DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol) { long nInputPlane, nInputDepth, nInputRows, nInputCols; long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols; long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k, i; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputDepth = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; nKernelPlane = kernel->size[0]; nKernelDepth= kernel->size[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; nOutputPlane = nInputPlane * kernel->size[0]; THArgCheck(nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "conv3DRevger : Input image is smaller than kernel"); nOutputDepth = nInputDepth - (nKernelDepth - 1) * sdepth; nOutputRows = nInputRows - (nKernelRows - 1) * srow; nOutputCols = nInputCols - (nKernelCols - 1) * scol; nelem = THTensor_(nElement)(r_); THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); for(k = 0; k < nKernelPlane; k++) { /* get kernel */ real *ptr_weight = weight_data+k*kstride0; for(i = 0; i < nInputPlane; i++) { /* get input */ real *ptr_input = input_data+i*istride0; /* do image, kernel convolution */ THTensor_(validXCorr3DRevptr)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol); /* Next output plane */ output_data += nOutputDepth*nOutputCols*nOutputRows; } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 4D input, 4D kernel, 5D output like rank1 update A <- xx' + beta*A */ void THTensor_(conv3Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputDepth, nInputRows, nInputCols; long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols; long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k, i; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputDepth = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; nKernelPlane = kernel->size[0]; nKernelDepth = kernel->size[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; nOutputPlane = nInputPlane * kernel->size[0]; THArgCheck((nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dger : Input image is smaller than kernel"); nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); for(k = 0; k < nKernelPlane; k++) { /* get kernel */ real *ptr_weight = weight_data+k*kstride0; for(i = 0; i < nInputPlane; i++) { /* get input */ real *ptr_input = input_data+i*istride0; /* do image, kernel convolution */ THTensor_(conv3d)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol, vf, xc); /* Next output plane */ output_data += nOutputDepth*nOutputCols*nOutputRows; } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 4D input, 5D kernel, 4D output matrix vector product like y <- Ax + beta*y */ void THTensor_(conv3Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputDepth, nInputRows, nInputCols; long nKernelDepth, nKernelRows, nKernelCols; long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; long istride0, kstride0, kstride1; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k, i; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 5 , 4, "kernel: 5D Tensor expected"); THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); if (!(k_->stride[4] == 1) || !(k_->stride[3] == k_->size[4])) { kernel = THTensor_(newContiguous)(k_); } else { THTensor_(retain)(k_); kernel = k_; } nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputDepth = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; kstride1 = kernel->stride[1]; nKernelDepth = kernel->size[2]; nKernelRows = kernel->size[3]; nKernelCols = kernel->size[4]; nOutputPlane = kernel->size[0]; THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmv : Input image is smaller than kernel"); nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); for(k = 0; k < nOutputPlane; k++) { for(i = 0; i < nInputPlane; i++) { /* get kernel */ real *ptr_weight = weight_data + k*kstride0 + i*kstride1; /* get input */ real *ptr_input = input_data + i*istride0; /* do image, kernel convolution */ THTensor_(conv3d)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol, vf, xc); } /* Next output plane */ output_data += nOutputDepth*nOutputCols*nOutputRows; } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 3D kernel, 3D output scalar multiplication like y <- x*y + beta*y */ void THTensor_(conv3Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc) { THTensor *input; THTensor* kernel; long nInputDepth; long nInputRows; long nInputCols; long nKernelDepth; long nKernelRows; long nKernelCols; long nOutputDepth, nOutputRows, nOutputCols; real *ptr_input; real *ptr_weight; real *output_data; ptrdiff_t nelem; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputDepth = input->size[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; nKernelDepth = kernel->size[0]; nKernelRows = kernel->size[1]; nKernelCols = kernel->size[2]; THArgCheck((nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmul : Input image is smaller than kernel"); nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize3d)(r_, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) THTensor_(zero)(r_); else if (beta != 1) THTensor_(mul)(r_, r_, beta); ptr_input = THTensor_(data)(input); ptr_weight = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); /* do image, kernel convolution */ THTensor_(conv3d)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol, vf, xc); THTensor_(free)(input); THTensor_(free)(kernel); } /* 4D input, 4D kernel, 4D output component wise multiplication like y <- y.*x + beta*y */ void THTensor_(conv3Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputDepth, nInputRows, nInputCols; long nKernelDepth, nKernelRows, nKernelCols; long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 4 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 3D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); istride0 = input->stride[0]; nInputPlane = input->size[0]; nInputDepth = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; nOutputPlane = kernel->size[0]; nKernelDepth = kernel->size[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dcmul : Input image is smaller than kernel"); nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); for(k = 0; k < nOutputPlane; k++) { /* get kernel */ real *ptr_weight = weight_data + k*kstride0; /* get input */ real *ptr_input = input_data + k*istride0; /* do image, kernel convolution */ THTensor_(conv3d)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol, vf, xc); /* Next output plane */ output_data += nOutputDepth*nOutputCols*nOutputRows; } THTensor_(free)(input); THTensor_(free)(kernel); } /* 4D input, 4D kernel, 4D output component wise multiplication like with a permutation map y <- y.*x + beta*y */ void THTensor_(conv3Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map, long sdepth, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputDepth, nInputRows, nInputCols; long nKernelDepth, nKernelRows, nKernelCols; long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; ptrdiff_t nelem; real *input_data; real *weight_data; real *output_data; long nmaps; long k; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(map->nDimension == 2 , 4, "map: 2D Tensor expected"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); istride0 = input->stride[0]; nInputPlane = input->size[0]; nInputDepth = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; nOutputPlane = kernel->size[0]; nKernelDepth = kernel->size[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); THArgCheck((nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmap : Input image is smaller than kernel"); nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); nmaps = map->size[0]; for(k = 0; k < nmaps; k++) { /* get indices */ long from = (long)THTensor_(get2d)(map,k,0)-1; long to = (long)THTensor_(get2d)(map,k,1)-1; /* get kernel */ real *ptr_weight = weight_data + k*kstride0; /* get input */ real *ptr_input = input_data + from*istride0; /* get output */ real *ptr_output = output_data + to*nOutputDepth*nOutputRows*nOutputCols; /* do image, kernel convolution */ THTensor_(conv3d)(ptr_output, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol, vf, xc); } THTensor_(free)(input); THTensor_(free)(kernel); } #endif lib/TH/generic/THTensorConv.h000066400000000000000000000113461316246254300162730ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensorConv.h" #else TH_API void THTensor_(validXCorr2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc); TH_API void THTensor_(validConv2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc); TH_API void THTensor_(fullXCorr2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc); TH_API void THTensor_(fullConv2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc); TH_API void THTensor_(validXCorr2DRevptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc); TH_API void THTensor_(conv2DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol); TH_API void THTensor_(conv2DRevgerm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol); TH_API void THTensor_(conv2Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc); TH_API void THTensor_(conv2Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc); TH_API void THTensor_(conv2Dmm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc); TH_API void THTensor_(conv2Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc); TH_API void THTensor_(conv2Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc); TH_API void THTensor_(validXCorr3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc); TH_API void THTensor_(validConv3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc); TH_API void THTensor_(fullXCorr3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc); TH_API void THTensor_(fullConv3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc); TH_API void THTensor_(validXCorr3DRevptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc); TH_API void THTensor_(conv3DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol); TH_API void THTensor_(conv3Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc); TH_API void THTensor_(conv3Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc); TH_API void THTensor_(conv3Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc); TH_API void THTensor_(conv3Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc); #endif lib/TH/generic/THTensorCopy.c000066400000000000000000000106051316246254300162700ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensorCopy.c" #else int THTensor_(copyTransposeValid)(THTensor *tensor, THTensor *src) { const int MIN_SZ = 60 * 60; return THTensor_(isContiguous)(tensor) && THTensor_(nDimension)(src) == 2 && THTensor_(stride)(src, 0) == 1 && THTensor_(stride)(src, 1) == THTensor_(size)(src, 0) && THTensor_(nElement)(tensor) >= MIN_SZ; } // special case copy where tensor is contiguous and src is a transposed matrix // This can be generalized to most copies, but it's tricker void THTensor_(copyTranspose)(THTensor *tensor, THTensor *src) { #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #define MAX(x, y) (((x) > (y)) ? (x) : (y)) #ifdef TH_REAL_IS_BYTE const int BLOCK_SZ = 120; #else const int BLOCK_SZ = 60; #endif THTensor *buf = THTensor_(newWithSize2d)(BLOCK_SZ, BLOCK_SZ); real *sp = THTensor_(data)(src); real *rp = THTensor_(data)(tensor); real *bp = THTensor_(data)(buf); long NR = THTensor_(size)(src, 0); long NC = THTensor_(size)(src, 1); for (long R = 0; R < NR; R += BLOCK_SZ) { for (long C = 0; C < NC; C += BLOCK_SZ) { real *spo = sp + R + C * NR; real *rpo = rp + C + R * NC; int nr = MIN(NR - R, BLOCK_SZ); int nc = MIN(NC - C, BLOCK_SZ); // 1. copy columns from src to buf for (int c = 0; c < nc; c++) { memcpy(bp + c * BLOCK_SZ, spo + c * NR, nr * sizeof(real)); } // 2. transpose buf in place int rc_max = MAX(nr, nc); int rc_min = MIN(nr, nc); for (int r = 0; r < rc_max; r++) { int end = MIN(r, rc_min); for (int c = 0; c < end; c++) { real tmp = bp[r + BLOCK_SZ * c]; bp[r + BLOCK_SZ * c] = bp[r * BLOCK_SZ + c]; bp[r * BLOCK_SZ + c] = tmp; } } // 3. copy rows from buf to dst for (int r = 0; r < nr; r++) { memcpy(rpo + r * NC, bp + r * BLOCK_SZ, nc * sizeof(real)); } } } THTensor_(free)(buf); #undef MIN #undef MAX } void THTensor_(copy)(THTensor *tensor, THTensor *src) { if (tensor == src) return; if (THTensor_(isContiguous)(tensor) && THTensor_(isContiguous)(src) && THTensor_(nElement)(tensor) == THTensor_(nElement)(src)) { real *sp = THTensor_(data)(src); real *rp = THTensor_(data)(tensor); ptrdiff_t sz = THTensor_(nElement)(tensor); #ifndef TH_REAL_IS_HALF THVector_(copy)(rp, sp, sz); #else memcpy(rp, sp, sz * sizeof(real)); #endif #ifndef TH_REAL_IS_HALF } else if (THTensor_(copyTransposeValid)(tensor, src)) { THTensor_(copyTranspose)(tensor, src); #endif } else { TH_TENSOR_APPLY2(real, tensor, real, src, *tensor_data = *src_data;) } } #define IMPLEMENT_THTensor_COPY(TYPENAMESRC, TYPE_SRC) \ void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \ { \ TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = (real)(*src_data);) \ } #define IMPLEMENT_THTensor_COPY_TO_HALF(TYPENAMESRC, TYPE_SRC) \ void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \ { \ TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = TH_float2half((float)*src_data);) \ } #define IMPLEMENT_THTensor_COPY_FROM_HALF(TYPENAMESRC, TYPE_SRC) \ void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \ { \ TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = (real)TH_half2float(*src_data);) \ } #define IMPLEMENT_THTensor_COPY_TO_FROM_HALF(TYPENAMESRC, TYPE_SRC) \ void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \ { \ TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = *src_data;) \ } #ifndef TH_REAL_IS_HALF IMPLEMENT_THTensor_COPY(Byte, unsigned char) IMPLEMENT_THTensor_COPY(Char, char) IMPLEMENT_THTensor_COPY(Short, short) IMPLEMENT_THTensor_COPY(Int, int) IMPLEMENT_THTensor_COPY(Long, long) IMPLEMENT_THTensor_COPY(Float, float) IMPLEMENT_THTensor_COPY(Double, double) IMPLEMENT_THTensor_COPY_FROM_HALF(Half, THHalf) #else /* only allow pass-through for Half */ IMPLEMENT_THTensor_COPY_TO_FROM_HALF(Half, THHalf) IMPLEMENT_THTensor_COPY_TO_HALF(Byte, unsigned char) IMPLEMENT_THTensor_COPY_TO_HALF(Char, char) IMPLEMENT_THTensor_COPY_TO_HALF(Short, short) IMPLEMENT_THTensor_COPY_TO_HALF(Int, int) IMPLEMENT_THTensor_COPY_TO_HALF(Long, long) IMPLEMENT_THTensor_COPY_TO_HALF(Float, float) IMPLEMENT_THTensor_COPY_TO_HALF(Double, double) #endif /* REAL_IS_HALF */ #endif lib/TH/generic/THTensorCopy.h000066400000000000000000000014731316246254300163000ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensorCopy.h" #else /* Support for copy between different Tensor types */ TH_API void THTensor_(copy)(THTensor *tensor, THTensor *src); TH_API void THTensor_(copyByte)(THTensor *tensor, struct THByteTensor *src); TH_API void THTensor_(copyChar)(THTensor *tensor, struct THCharTensor *src); TH_API void THTensor_(copyShort)(THTensor *tensor, struct THShortTensor *src); TH_API void THTensor_(copyInt)(THTensor *tensor, struct THIntTensor *src); TH_API void THTensor_(copyLong)(THTensor *tensor, struct THLongTensor *src); TH_API void THTensor_(copyFloat)(THTensor *tensor, struct THFloatTensor *src); TH_API void THTensor_(copyDouble)(THTensor *tensor, struct THDoubleTensor *src); TH_API void THTensor_(copyHalf)(THTensor *tensor, struct THHalfTensor *src); #endif lib/TH/generic/THTensorLapack.c000066400000000000000000001045461316246254300165610ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensorLapack.c" #else /* Check if self is transpose of a contiguous matrix */ static int THTensor_(isTransposedContiguous)(THTensor *self) { return self->stride[0] == 1 && self->stride[1] == self->size[0]; } /* If a matrix is a regular contiguous matrix, make sure it is transposed because this is what we return from Lapack calls. */ static void THTensor_(checkTransposed)(THTensor *self) { if(THTensor_(isContiguous)(self)) THTensor_(transpose)(self, NULL, 0, 1); return; } /* newContiguous followed by transpose Similar to (newContiguous), but checks if the transpose of the matrix is contiguous and also limited to 2D matrices. */ static THTensor *THTensor_(newTransposedContiguous)(THTensor *self) { THTensor *tensor; if(THTensor_(isTransposedContiguous)(self)) { THTensor_(retain)(self); tensor = self; } else { tensor = THTensor_(newContiguous)(self); THTensor_(transpose)(tensor, NULL, 0, 1); } return tensor; } /* Given the result tensor and src tensor, decide if the lapack call should use the provided result tensor or should allocate a new space to put the result in. The returned tensor have to be freed by the calling function. nrows is required, because some lapack calls, require output space smaller than input space, like underdetermined gels. */ static THTensor *THTensor_(checkLapackClone)(THTensor *result, THTensor *src, int nrows) { /* check if user wants to reuse src and if it is correct shape/size */ if (src == result && THTensor_(isTransposedContiguous)(src) && src->size[1] == nrows) THTensor_(retain)(result); else if(src == result || result == NULL) /* in this case, user wants reuse of src, but its structure is not OK */ result = THTensor_(new)(); else THTensor_(retain)(result); return result; } /* Same as cloneColumnMajor, but accepts nrows argument, because some lapack calls require the resulting tensor to be larger than src. */ static THTensor *THTensor_(cloneColumnMajorNrows)(THTensor *self, THTensor *src, int nrows) { THTensor *result; THTensor *view; if (src == NULL) src = self; result = THTensor_(checkLapackClone)(self, src, nrows); if (src == result) return result; THTensor_(resize2d)(result, src->size[1], nrows); THTensor_(checkTransposed)(result); if (src->size[0] == nrows) THTensor_(copy)(result, src); else { view = THTensor_(newNarrow)(result, 0, 0, src->size[0]); THTensor_(copy)(view, src); THTensor_(free)(view); } return result; } /* Create a clone of src in self column major order for use with Lapack. If src == self, a new tensor is allocated, in any case, the return tensor should be freed by calling function. */ static THTensor *THTensor_(cloneColumnMajor)(THTensor *self, THTensor *src) { return THTensor_(cloneColumnMajorNrows)(self, src, src->size[0]); } void THTensor_(gesv)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a) { int free_b = 0; if (a == NULL) a = ra_; if (b == NULL) b = rb_; THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d", a->nDimension); THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 " "dimensions, but has %d", b->nDimension); THArgCheck(a->size[0] == a->size[1], 2, "A should be square, but is %ldx%ld", a->size[0], a->size[1]); THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld " "rows, B has %ld", a->size[0], b->size[0]); if (b->nDimension == 1) { b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0], b->stride[0], 1, 0); free_b = 1; } int n, nrhs, lda, ldb, info; THIntTensor *ipiv; THTensor *ra__; // working version of A matrix to be passed into lapack GELS THTensor *rb__; // working version of B matrix to be passed into lapack GELS ra__ = THTensor_(cloneColumnMajor)(ra_, a); rb__ = THTensor_(cloneColumnMajor)(rb_, b); n = (int)ra__->size[0]; nrhs = (int)rb__->size[1]; lda = n; ldb = n; ipiv = THIntTensor_newWithSize1d((long)n); THLapack_(gesv)(n, nrhs, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), THTensor_(data)(rb__), ldb, &info); THLapackCheckWithCleanup("Lapack Error in %s : U(%d,%d) is zero, singular U.", THCleanup( THTensor_(free)(ra__); THTensor_(free)(rb__); THIntTensor_free(ipiv); if (free_b) THTensor_(free)(b);), "gesv", info, info); THTensor_(freeCopyTo)(ra__, ra_); THTensor_(freeCopyTo)(rb__, rb_); THIntTensor_free(ipiv); if (free_b) THTensor_(free)(b); } void THTensor_(trtrs)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a, const char *uplo, const char *trans, const char *diag) { int free_b = 0; if (a == NULL) a = ra_; if (b == NULL) b = rb_; THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d", a->nDimension); THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 " "dimensions, but has %d", b->nDimension); THArgCheck(a->size[0] == a->size[1], 2, "A should be square, but is %ldx%ld", a->size[0], a->size[1]); THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld " "rows, B has %ld", a->size[0], b->size[0]); if (b->nDimension == 1) { b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0], b->stride[0], 1, 0); free_b = 1; } int n, nrhs, lda, ldb, info; THTensor *ra__; // working version of A matrix to be passed into lapack TRTRS THTensor *rb__; // working version of B matrix to be passed into lapack TRTRS ra__ = THTensor_(cloneColumnMajor)(ra_, a); rb__ = THTensor_(cloneColumnMajor)(rb_, b); n = (int)ra__->size[0]; nrhs = (int)rb__->size[1]; lda = n; ldb = n; THLapack_(trtrs)(uplo[0], trans[0], diag[0], n, nrhs, THTensor_(data)(ra__), lda, THTensor_(data)(rb__), ldb, &info); THLapackCheckWithCleanup("Lapack Error in %s : A(%d,%d) is zero, singular A", THCleanup( THTensor_(free)(ra__); THTensor_(free)(rb__); if (free_b) THTensor_(free)(b);), "trtrs", info, info); THTensor_(freeCopyTo)(ra__, ra_); THTensor_(freeCopyTo)(rb__, rb_); if (free_b) THTensor_(free)(b); } void THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a) { int free_b = 0; // Note that a = NULL is interpreted as a = ra_, and b = NULL as b = rb_. if (a == NULL) a = ra_; if (b == NULL) b = rb_; THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d", a->nDimension); THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 " "dimensions, but has %d", b->nDimension); THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld " "rows, B has %ld", a->size[0], b->size[0]); if (b->nDimension == 1) { b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0], b->stride[0], 1, 0); free_b = 1; } int m, n, nrhs, lda, ldb, info, lwork; THTensor *work = NULL; real wkopt = 0; THTensor *ra__ = NULL; // working version of A matrix to be passed into lapack GELS THTensor *rb__ = NULL; // working version of B matrix to be passed into lapack GELS ra__ = THTensor_(cloneColumnMajor)(ra_, a); m = ra__->size[0]; n = ra__->size[1]; lda = m; ldb = (m > n) ? m : n; rb__ = THTensor_(cloneColumnMajorNrows)(rb_, b, ldb); nrhs = rb__->size[1]; info = 0; /* get optimal workspace size */ THLapack_(gels)('N', m, n, nrhs, THTensor_(data)(ra__), lda, THTensor_(data)(rb__), ldb, &wkopt, -1, &info); lwork = (int)wkopt; work = THTensor_(newWithSize1d)(lwork); THLapack_(gels)('N', m, n, nrhs, THTensor_(data)(ra__), lda, THTensor_(data)(rb__), ldb, THTensor_(data)(work), lwork, &info); THLapackCheckWithCleanup("Lapack Error in %s : The %d-th diagonal element of the triangular factor of A is zero", THCleanup(THTensor_(free)(ra__); THTensor_(free)(rb__); THTensor_(free)(work); if (free_b) THTensor_(free)(b);), "gels", info,""); /* rb__ is currently ldb by nrhs; resize it to n by nrhs */ rb__->size[0] = n; if (rb__ != rb_) THTensor_(resize2d)(rb_, n, nrhs); THTensor_(freeCopyTo)(ra__, ra_); THTensor_(freeCopyTo)(rb__, rb_); THTensor_(free)(work); if (free_b) THTensor_(free)(b); } void THTensor_(geev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobvr) { int n, lda, lwork, info, ldvr; THTensor *work, *wi, *wr, *a; real wkopt; real *rv_data; long i; THTensor *re__ = NULL; THTensor *rv__ = NULL; THArgCheck(a_->nDimension == 2, 1, "A should be 2 dimensional"); THArgCheck(a_->size[0] == a_->size[1], 1,"A should be square"); /* we want to definitely clone a_ for geev*/ a = THTensor_(cloneColumnMajor)(NULL, a_); n = a->size[0]; lda = n; wi = THTensor_(newWithSize1d)(n); wr = THTensor_(newWithSize1d)(n); rv_data = NULL; ldvr = 1; if (*jobvr == 'V') { THTensor_(resize2d)(rv_,n,n); /* guard against someone passing a correct size, but wrong stride */ rv__ = THTensor_(newTransposedContiguous)(rv_); rv_data = THTensor_(data)(rv__); ldvr = n; } THTensor_(resize2d)(re_,n,2); re__ = THTensor_(newContiguous)(re_); /* get optimal workspace size */ THLapack_(geev)('N', jobvr[0], n, THTensor_(data)(a), lda, THTensor_(data)(wr), THTensor_(data)(wi), NULL, 1, rv_data, ldvr, &wkopt, -1, &info); lwork = (int)wkopt; work = THTensor_(newWithSize1d)(lwork); THLapack_(geev)('N', jobvr[0], n, THTensor_(data)(a), lda, THTensor_(data)(wr), THTensor_(data)(wi), NULL, 1, rv_data, ldvr, THTensor_(data)(work), lwork, &info); THLapackCheckWithCleanup(" Lapack Error in %s : %d off-diagonal elements of an didn't converge to zero", THCleanup(THTensor_(free)(re__); THTensor_(free)(rv__); THTensor_(free)(a); THTensor_(free)(wi); THTensor_(free)(wr); THTensor_(free)(work);), "geev", info,""); { real *re_data = THTensor_(data)(re__); real *wi_data = THTensor_(data)(wi); real *wr_data = THTensor_(data)(wr); for (i=0; inDimension == 2, 1, "A should be 2 dimensional"); THArgCheck(a->size[0] == a->size[1], 1,"A should be square"); int n, lda, lwork, info; THTensor *work; real wkopt; THTensor *rv__ = NULL; THTensor *re__ = NULL; rv__ = THTensor_(cloneColumnMajor)(rv_, a); n = rv__->size[0]; lda = n; THTensor_(resize1d)(re_,n); re__ = THTensor_(newContiguous)(re_); /* get optimal workspace size */ THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(rv__), lda, THTensor_(data)(re_), &wkopt, -1, &info); lwork = (int)wkopt; work = THTensor_(newWithSize1d)(lwork); THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(rv__), lda, THTensor_(data)(re_), THTensor_(data)(work), lwork, &info); THLapackCheckWithCleanup("Lapack Error %s : %d off-diagonal elements didn't converge to zero", THCleanup(THTensor_(free)(rv__); THTensor_(free)(re__); THTensor_(free)(work);), "syev", info,""); THTensor_(freeCopyTo)(rv__, rv_); THTensor_(freeCopyTo)(re__, re_); THTensor_(free)(work); } void THTensor_(gesvd)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *a, const char* jobu) { THTensor *ra_ = THTensor_(new)(); THTensor_(gesvd2)(ru_, rs_, rv_, ra_, a, jobu); THTensor_(free)(ra_); } void THTensor_(gesvd2)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *ra_, THTensor *a, const char* jobu) { if (a == NULL) a = ra_; THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); int k,m, n, lda, ldu, ldvt, lwork, info; THTensor *work; THTensor *rvf_ = THTensor_(new)(); real wkopt; THTensor *ra__ = NULL; THTensor *ru__ = NULL; THTensor *rs__ = NULL; THTensor *rv__ = NULL; ra__ = THTensor_(cloneColumnMajor)(ra_, a); m = ra__->size[0]; n = ra__->size[1]; k = (m < n ? m : n); lda = m; ldu = m; ldvt = n; THTensor_(resize1d)(rs_,k); THTensor_(resize2d)(rvf_,ldvt,n); if (*jobu == 'A') THTensor_(resize2d)(ru_,m,ldu); else THTensor_(resize2d)(ru_,k,ldu); THTensor_(checkTransposed)(ru_); /* guard against someone passing a correct size, but wrong stride */ ru__ = THTensor_(newTransposedContiguous)(ru_); rs__ = THTensor_(newContiguous)(rs_); rv__ = THTensor_(newContiguous)(rvf_); THLapack_(gesvd)(jobu[0],jobu[0], m,n,THTensor_(data)(ra__),lda, THTensor_(data)(rs__), THTensor_(data)(ru__), ldu, THTensor_(data)(rv__), ldvt, &wkopt, -1, &info); lwork = (int)wkopt; work = THTensor_(newWithSize1d)(lwork); THLapack_(gesvd)(jobu[0],jobu[0], m,n,THTensor_(data)(ra__),lda, THTensor_(data)(rs__), THTensor_(data)(ru__), ldu, THTensor_(data)(rv__), ldvt, THTensor_(data)(work),lwork, &info); THLapackCheckWithCleanup(" Lapack Error %s : %d superdiagonals failed to converge.", THCleanup( THTensor_(free)(ru__); THTensor_(free)(rs__); THTensor_(free)(rv__); THTensor_(free)(ra__); THTensor_(free)(work);), "gesvd", info,""); if (*jobu == 'S') THTensor_(narrow)(rv__,NULL,1,0,k); THTensor_(freeCopyTo)(ru__, ru_); THTensor_(freeCopyTo)(rs__, rs_); THTensor_(freeCopyTo)(rv__, rvf_); THTensor_(freeCopyTo)(ra__, ra_); THTensor_(free)(work); if (*jobu == 'S') { THTensor_(narrow)(rvf_,NULL,1,0,k); } THTensor_(resizeAs)(rv_, rvf_); THTensor_(copy)(rv_, rvf_); THTensor_(free)(rvf_); } void THTensor_(getri)(THTensor *ra_, THTensor *a) { if (a == NULL) a = ra_; THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); int m, n, lda, info, lwork; real wkopt; THIntTensor *ipiv; THTensor *work; THTensor *ra__ = NULL; ra__ = THTensor_(cloneColumnMajor)(ra_, a); m = ra__->size[0]; n = ra__->size[1]; lda = m; ipiv = THIntTensor_newWithSize1d((long)m); /* Run LU */ THLapack_(getrf)(n, n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), &info); THLapackCheckWithCleanup("Lapack Error %s : U(%d,%d) is 0, U is singular", THCleanup( THTensor_(free)(ra__); THIntTensor_free(ipiv);), "getrf", info, info); /* Run inverse */ THLapack_(getri)(n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), &wkopt, -1, &info); lwork = (int)wkopt; work = THTensor_(newWithSize1d)(lwork); THLapack_(getri)(n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), THTensor_(data)(work), lwork, &info); THLapackCheckWithCleanup("Lapack Error %s : U(%d,%d) is 0, U is singular", THCleanup( THTensor_(free)(ra__); THTensor_(free)(work); THIntTensor_free(ipiv);), "getri", info, info); THTensor_(freeCopyTo)(ra__, ra_); THTensor_(free)(work); THIntTensor_free(ipiv); } void THTensor_(clearUpLoTriangle)(THTensor *a, const char *uplo) { THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); int n = a->size[0]; /* Build full matrix */ real *p = THTensor_(data)(a); long i, j; /* Upper Triangular Case */ if (uplo[0] == 'U') { /* Clear lower triangle (excluding diagonals) */ for (i=0; inDimension == 2, 1, "A should be 2 dimensional"); THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); int n = a->size[0]; /* Build full matrix */ real *p = THTensor_(data)(a); long i, j; /* Upper Triangular Case */ if (uplo[0] == 'U') { /* Clear lower triangle (excluding diagonals) */ for (i=0; inDimension == 2, 1, "A should be 2 dimensional"); THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); int n, lda, info; THTensor *ra__ = NULL; ra__ = THTensor_(cloneColumnMajor)(ra_, a); n = ra__->size[0]; lda = n; /* Run Factorization */ THLapack_(potrf)(uplo[0], n, THTensor_(data)(ra__), lda, &info); THLapackCheckWithCleanup("Lapack Error in %s : the leading minor of order %d is not positive definite", THCleanup(THTensor_(free)(ra__);), "potrf", info, ""); THTensor_(clearUpLoTriangle)(ra__, uplo); THTensor_(freeCopyTo)(ra__, ra_); } void THTensor_(potrs)(THTensor *rb_, THTensor *b, THTensor *a, const char *uplo) { int free_b = 0; if (b == NULL) b = rb_; THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d", a->nDimension); THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 " "dimensions, but has %d", b->nDimension); THArgCheck(a->size[0] == a->size[1], 2, "A should be square, but is %ldx%ld", a->size[0], a->size[1]); THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld " "rows, B has %ld", a->size[0], b->size[0]); if (b->nDimension == 1) { b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0], b->stride[0], 1, 0); free_b = 1; } int n, nrhs, lda, ldb, info; THTensor *ra__; // working version of A matrix to be passed into lapack TRTRS THTensor *rb__; // working version of B matrix to be passed into lapack TRTRS ra__ = THTensor_(cloneColumnMajor)(NULL, a); rb__ = THTensor_(cloneColumnMajor)(rb_, b); n = (int)ra__->size[0]; nrhs = (int)rb__->size[1]; lda = n; ldb = n; THLapack_(potrs)(uplo[0], n, nrhs, THTensor_(data)(ra__), lda, THTensor_(data)(rb__), ldb, &info); THLapackCheckWithCleanup("Lapack Error in %s : A(%d,%d) is zero, singular A", THCleanup( THTensor_(free)(ra__); THTensor_(free)(rb__); if (free_b) THTensor_(free)(b);), "potrs", info, info); if (free_b) THTensor_(free)(b); THTensor_(free)(ra__); THTensor_(freeCopyTo)(rb__, rb_); } void THTensor_(potri)(THTensor *ra_, THTensor *a, const char *uplo) { if (a == NULL) a = ra_; THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); int n, lda, info; THTensor *ra__ = NULL; ra__ = THTensor_(cloneColumnMajor)(ra_, a); n = ra__->size[0]; lda = n; /* Run inverse */ THLapack_(potri)(uplo[0], n, THTensor_(data)(ra__), lda, &info); THLapackCheckWithCleanup("Lapack Error %s : A(%d,%d) is 0, A cannot be factorized", THCleanup(THTensor_(free)(ra__);), "potri", info, info); THTensor_(copyUpLoTriangle)(ra__, uplo); THTensor_(freeCopyTo)(ra__, ra_); } /* Computes the Cholesky factorization with complete pivoting of a real symmetric positive semidefinite matrix. Args: * `ra_` - result Tensor in which to store the factor U or L from the Cholesky factorization. * `rpiv_` - result IntTensor containing sparse permutation matrix P, encoded as P[rpiv_[k], k] = 1. * `a` - input Tensor; the input matrix to factorize. * `uplo` - string; specifies whether the upper or lower triangular part of the symmetric matrix A is stored. "U"/"L" for upper/lower triangular. * `tol` - double; user defined tolerance, or < 0 for automatic choice. The algorithm terminates when the pivot <= tol. */ void THTensor_(pstrf)(THTensor *ra_, THIntTensor *rpiv_, THTensor *a, const char *uplo, real tol) { THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); int n = a->size[0]; THTensor *ra__ = THTensor_(cloneColumnMajor)(ra_, a); THIntTensor_resize1d(rpiv_, n); // Allocate working tensor THTensor *work = THTensor_(newWithSize1d)(2 * n); // Run Cholesky factorization int lda = n; int rank, info; THLapack_(pstrf)(uplo[0], n, THTensor_(data)(ra__), lda, THIntTensor_data(rpiv_), &rank, tol, THTensor_(data)(work), &info); THLapackCheckWithCleanup("Lapack Error %s : matrix is rank deficient or not positive semidefinite", THCleanup( THTensor_(free)(ra__); THTensor_(free)(work);), "pstrf", info,""); THTensor_(clearUpLoTriangle)(ra__, uplo); THTensor_(freeCopyTo)(ra__, ra_); THTensor_(free)(work); } /* Perform a QR decomposition of a matrix. In LAPACK, two parts of the QR decomposition are implemented as two separate functions: geqrf and orgqr. For flexibility and efficiency, these are wrapped directly, below - but to make the common usage convenient, we also provide this function, which calls them both and returns the results in a more intuitive form. Args: * `rq_` - result Tensor in which to store the Q part of the decomposition. * `rr_` - result Tensor in which to store the R part of the decomposition. * `a` - input Tensor; the matrix to decompose. */ void THTensor_(qr)(THTensor *rq_, THTensor *rr_, THTensor *a) { int m = a->size[0]; int n = a->size[1]; int k = (m < n ? m : n); THTensor *ra_ = THTensor_(new)(); THTensor *rtau_ = THTensor_(new)(); THTensor *rr__ = THTensor_(new)(); THTensor_(geqrf)(ra_, rtau_, a); THTensor_(resize2d)(rr__, k, ra_->size[1]); THTensor_(narrow)(rr__, ra_, 0, 0, k); THTensor_(triu)(rr_, rr__, 0); THTensor_(resize2d)(rq_, ra_->size[0], k); THTensor_(orgqr)(rq_, ra_, rtau_); THTensor_(narrow)(rq_, rq_, 1, 0, k); THTensor_(free)(ra_); THTensor_(free)(rtau_); THTensor_(free)(rr__); } /* The geqrf function does the main work of QR-decomposing a matrix. However, rather than producing a Q matrix directly, it produces a sequence of elementary reflectors which may later be composed to construct Q - for example with the orgqr function, below. Args: * `ra_` - Result matrix which will contain: i) The elements of R, on and above the diagonal. ii) Directions of the reflectors implicitly defining Q. * `rtau_` - Result tensor which will contain the magnitudes of the reflectors implicitly defining Q. * `a` - Input matrix, to decompose. If NULL, `ra_` is used as input. For further details, please see the LAPACK documentation. */ void THTensor_(geqrf)(THTensor *ra_, THTensor *rtau_, THTensor *a) { if (a == NULL) ra_ = a; THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); THTensor *ra__ = NULL; /* Prepare the input for LAPACK, making a copy if necessary. */ ra__ = THTensor_(cloneColumnMajor)(ra_, a); int m = ra__->size[0]; int n = ra__->size[1]; int k = (m < n ? m : n); int lda = m; THTensor_(resize1d)(rtau_, k); /* Dry-run to query the suggested size of the workspace. */ int info = 0; real wkopt = 0; THLapack_(geqrf)(m, n, THTensor_(data)(ra__), lda, THTensor_(data)(rtau_), &wkopt, -1, &info); /* Allocate the workspace and call LAPACK to do the real work. */ int lwork = (int)wkopt; THTensor *work = THTensor_(newWithSize1d)(lwork); THLapack_(geqrf)(m, n, THTensor_(data)(ra__), lda, THTensor_(data)(rtau_), THTensor_(data)(work), lwork, &info); THLapackCheckWithCleanup("Lapack Error %s : unknown Lapack error. info = %i", THCleanup( THTensor_(free)(ra__); THTensor_(free)(work);), "geqrf", info,""); THTensor_(freeCopyTo)(ra__, ra_); THTensor_(free)(work); } /* The orgqr function allows reconstruction of a matrix Q with orthogonal columns, from a sequence of elementary reflectors, such as is produced by the geqrf function. Args: * `ra_` - result Tensor, which will contain the matrix Q. * `a` - input Tensor, which should be a matrix with the directions of the elementary reflectors below the diagonal. If NULL, `ra_` is used as input. * `tau` - input Tensor, containing the magnitudes of the elementary reflectors. For further details, please see the LAPACK documentation. */ void THTensor_(orgqr)(THTensor *ra_, THTensor *a, THTensor *tau) { if (a == NULL) a = ra_; THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); THTensor *ra__ = NULL; ra__ = THTensor_(cloneColumnMajor)(ra_, a); int m = ra__->size[0]; int n = ra__->size[1]; int k = tau->size[0]; int lda = m; /* Dry-run to query the suggested size of the workspace. */ int info = 0; real wkopt = 0; THLapack_(orgqr)(m, k, k, THTensor_(data)(ra__), lda, THTensor_(data)(tau), &wkopt, -1, &info); /* Allocate the workspace and call LAPACK to do the real work. */ int lwork = (int)wkopt; THTensor *work = THTensor_(newWithSize1d)(lwork); THLapack_(orgqr)(m, k, k, THTensor_(data)(ra__), lda, THTensor_(data)(tau), THTensor_(data)(work), lwork, &info); THLapackCheckWithCleanup(" Lapack Error %s : unknown Lapack error. info = %i", THCleanup( THTensor_(free)(ra__); THTensor_(free)(work);), "orgqr", info,""); THTensor_(freeCopyTo)(ra__, ra_); THTensor_(free)(work); } /* The ormqr function multiplies Q with another matrix from a sequence of elementary reflectors, such as is produced by the geqrf function. Args: * `ra_` - result Tensor, which will contain the matrix Q' c. * `a` - input Tensor, which should be a matrix with the directions of the elementary reflectors below the diagonal. If NULL, `ra_` is used as input. * `tau` - input Tensor, containing the magnitudes of the elementary reflectors. * `c` - input Tensor, containing the matrix to be multiplied. * `side` - char, determining whether c is left- or right-multiplied with Q. * `trans` - char, determining whether to transpose Q before multiplying. For further details, please see the LAPACK documentation. */ void THTensor_(ormqr)(THTensor *ra_, THTensor *a, THTensor *tau, THTensor *c, const char *side, const char *trans) { if (a == NULL) a = ra_; THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); THTensor *ra__ = NULL; ra__ = THTensor_(cloneColumnMajor)(ra_, c); int m = c->size[0]; int n = c->size[1]; int k = tau->size[0]; int lda; if (*side == 'L') { lda = m; } else { lda = n; } int ldc = m; /* Dry-run to query the suggested size of the workspace. */ int info = 0; real wkopt = 0; THLapack_(ormqr)(side[0], trans[0], m, n, k, THTensor_(data)(a), lda, THTensor_(data)(tau), THTensor_(data)(ra__), ldc, &wkopt, -1, &info); /* Allocate the workspace and call LAPACK to do the real work. */ int lwork = (int)wkopt; THTensor *work = THTensor_(newWithSize1d)(lwork); THLapack_(ormqr)(side[0], trans[0], m, n, k, THTensor_(data)(a), lda, THTensor_(data)(tau), THTensor_(data)(ra__), ldc, THTensor_(data)(work), lwork, &info); THLapackCheckWithCleanup(" Lapack Error %s : unknown Lapack error. info = %i", THCleanup( THTensor_(free)(ra__); THTensor_(free)(work);), "ormqr", info,""); THTensor_(freeCopyTo)(ra__, ra_); THTensor_(free)(work); } void THTensor_(btrifact)(THTensor *ra_, THIntTensor *rpivots_, THIntTensor *rinfo_, int pivot, THTensor *a) { THArgCheck(THTensor_(nDimension)(a) == 3, 1, "expected 3D tensor, got %dD", THTensor_(nDimension)(a)); if (!pivot) { THError("btrifact without pivoting is not implemented on the CPU"); } if (ra_ != a) { THTensor_(resizeAs)(ra_, a); THTensor_(copy)(ra_, a); } int m = a->size[1]; int n = a->size[2]; if (m != n) { THError("btrifact is only implemented for square matrices"); } long num_batches = THTensor_(size)(a, 0); THTensor *ra__; int lda; if (ra_->stride[1] == 1) { // column ordered, what BLAS wants lda = ra_->stride[2]; ra__ = ra_; } else { // not column ordered, need to make it such (requires copy) THTensor *transp_r_ = THTensor_(newTranspose)(ra_, 1, 2); ra__ = THTensor_(newClone)(transp_r_); THTensor_(free)(transp_r_); THTensor_(transpose)(ra__, NULL, 1, 2); lda = ra__->stride[2]; } THTensor *ai = THTensor_(new)(); THTensor *rai = THTensor_(new)(); THIntTensor *rpivoti = THIntTensor_new(); int info = 0; int *info_ptr = &info; if (rinfo_) { THIntTensor_resize1d(rinfo_, num_batches); info_ptr = THIntTensor_data(rinfo_); } THIntTensor_resize2d(rpivots_, num_batches, n); long batch = 0; for (; batch < num_batches; ++batch) { THTensor_(select)(ai, a, 0, batch); THTensor_(select)(rai, ra__, 0, batch); THIntTensor_select(rpivoti, rpivots_, 0, batch); THLapack_(getrf)(n, n, THTensor_(data)(rai), lda, THIntTensor_data(rpivoti), info_ptr); if (rinfo_) { info_ptr++; } else if (info != 0) { break; } } THTensor_(free)(ai); THTensor_(free)(rai); THIntTensor_free(rpivoti); if (ra__ != ra_) { THTensor_(freeCopyTo)(ra__, ra_); } if (!rinfo_ && info != 0) { THError("failed to factorize batch element %ld (info == %d)", batch, info); } } void THTensor_(btrisolve)(THTensor *rb_, THTensor *b, THTensor *atf, THIntTensor *pivots) { THArgCheck(THTensor_(nDimension)(atf) == 3, 1, "expected 3D tensor, got %dD", THTensor_(nDimension)(atf)); THArgCheck(THTensor_(nDimension)(b) == 3 || THTensor_(nDimension)(b) == 2, 4, "expected 2D or 3D tensor"); THArgCheck(THTensor_(size)(atf, 0) == THTensor_(size)(b, 0), 3, "number of batches must be equal"); THArgCheck(THTensor_(size)(atf, 1) == THTensor_(size)(atf, 2), 3, "A matrices must be square"); THArgCheck(THTensor_(size)(atf, 1) == THTensor_(size)(b, 1), 3, "dimensions of A and b must be equal"); if (rb_ != b) { THTensor_(resizeAs)(rb_, b); THTensor_(copy)(rb_, b); } long num_batches = atf->size[0]; long n = atf->size[1]; int nrhs = rb_->nDimension > 2 ? rb_->size[2] : 1; int lda, ldb; THTensor *atf_; THTensor *rb__; // correct ordering of A if (atf->stride[1] == 1) { // column ordered, what BLAS wants lda = atf->stride[2]; atf_ = atf; } else { // not column ordered, need to make it such (requires copy) // it would be nice if we could use the op(A) flags to automatically // transpose A if needed, but this leads to unpredictable behavior if the // user clones A_tf later with a different ordering THTensor *transp_r_ = THTensor_(newTranspose)(atf, 1, 2); atf_ = THTensor_(newClone)(transp_r_); THTensor_(free)(transp_r_); THTensor_(transpose)(atf_, NULL, 1, 2); lda = atf_->stride[2]; } // correct ordering of B if (rb_->stride[1] == 1) { // column ordered if (rb_->nDimension == 2 || rb_->size[2] == 1) { ldb = n; } else { ldb = rb_->stride[2]; } rb__ = rb_; } else { // make column ordered if (rb_->nDimension > 2) { THTensor *transp_r_ = THTensor_(newTranspose)(rb_, 1, 2); rb__ = THTensor_(newClone)(transp_r_); THTensor_(free)(transp_r_); THTensor_(transpose)(rb__, NULL, 1, 2); ldb = rb__->stride[2]; } else { rb__ = THTensor_(newClone)(rb_); ldb = n; } } THTensor *ai = THTensor_(new)(); THTensor *rbi = THTensor_(new)(); THIntTensor *pivoti = THIntTensor_new(); if (!THIntTensor_isContiguous(pivots)) { THError("Error: rpivots_ is not contiguous."); } for (long batch = 0; batch < num_batches; ++batch) { THTensor_(select)(ai, atf_, 0, batch); THTensor_(select)(rbi, rb__, 0, batch); THIntTensor_select(pivoti, pivots, 0, batch); #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) int info; THLapack_(getrs)('N', n, nrhs, THTensor_(data)(ai), lda, THIntTensor_data(pivoti), THTensor_(data)(rbi), ldb, &info); if (info != 0) { THError("Error: Nonzero info."); } #else THError("Unimplemented"); #endif } THTensor_(free)(ai); THTensor_(free)(rbi); THIntTensor_free(pivoti); if (atf_ != atf) { THTensor_(free)(atf_); } if (rb__ != rb_) { THTensor_(freeCopyTo)(rb__, rb_); } } #endif lib/TH/generic/THTensorLapack.h000066400000000000000000000034071316246254300165600ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensorLapack.h" #else TH_API void THTensor_(gesv)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_); TH_API void THTensor_(trtrs)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_, const char *uplo, const char *trans, const char *diag); TH_API void THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_); TH_API void THTensor_(syev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobz, const char *uplo); TH_API void THTensor_(geev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobvr); TH_API void THTensor_(gesvd)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *a, const char *jobu); TH_API void THTensor_(gesvd2)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *ra_, THTensor *a, const char *jobu); TH_API void THTensor_(getri)(THTensor *ra_, THTensor *a); TH_API void THTensor_(potrf)(THTensor *ra_, THTensor *a, const char *uplo); TH_API void THTensor_(potrs)(THTensor *rb_, THTensor *b_, THTensor *a_, const char *uplo); TH_API void THTensor_(potri)(THTensor *ra_, THTensor *a, const char *uplo); TH_API void THTensor_(qr)(THTensor *rq_, THTensor *rr_, THTensor *a); TH_API void THTensor_(geqrf)(THTensor *ra_, THTensor *rtau_, THTensor *a); TH_API void THTensor_(orgqr)(THTensor *ra_, THTensor *a, THTensor *tau); TH_API void THTensor_(ormqr)(THTensor *ra_, THTensor *a, THTensor *tau, THTensor *c, const char *side, const char *trans); TH_API void THTensor_(pstrf)(THTensor *ra_, THIntTensor *rpiv_, THTensor*a, const char* uplo, real tol); TH_API void THTensor_(btrifact)(THTensor *ra_, THIntTensor *rpivots_, THIntTensor *rinfo_, int pivot, THTensor *a); TH_API void THTensor_(btrisolve)(THTensor *rb_, THTensor *b, THTensor *atf, THIntTensor *pivots); #endif lib/TH/generic/THTensorMath.c000066400000000000000000003160351316246254300162550ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensorMath.c" #else #ifndef NAN #define NAN (nan(NULL)) #endif #ifdef _OPENMP #include #endif #define TH_OMP_OVERHEAD_THRESHOLD 100000 #ifdef _OPENMP #ifndef _WIN32 #define PRAGMA(P) _Pragma(#P) #else #define PRAGMA(P) __pragma(P) #endif #define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \ { \ ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR); \ PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \ { \ size_t num_threads = omp_get_num_threads(); \ size_t tid = omp_get_thread_num(); \ ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \ ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \ TH_TENSOR_offset + TH_TENSOR_size / num_threads; \ ptrdiff_t TENSOR##_len = TH_TENSOR_end - TH_TENSOR_offset; \ TYPE *TENSOR##_data = THTensor_(data)(TENSOR) + TH_TENSOR_offset; \ CODE \ } \ } #else #define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \ { \ TYPE *TENSOR##_data = THTensor_(data)(TENSOR); \ ptrdiff_t TENSOR##_len = THTensor_(nElement)(TENSOR); \ CODE \ } #endif #ifdef _OPENMP #define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \ { \ ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \ PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \ { \ size_t num_threads = omp_get_num_threads(); \ size_t tid = omp_get_thread_num(); \ ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \ ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \ TH_TENSOR_offset + TH_TENSOR_size / num_threads; \ ptrdiff_t TENSOR1##_len = TH_TENSOR_end - TH_TENSOR_offset; \ TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1) + TH_TENSOR_offset; \ TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2) + TH_TENSOR_offset; \ CODE \ } \ } #else #define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \ { \ TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1); \ TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2); \ ptrdiff_t TENSOR1##_len = THTensor_(nElement)(TENSOR1); \ CODE \ } #endif #ifdef _OPENMP #define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \ { \ ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \ PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \ { \ size_t num_threads = omp_get_num_threads(); \ size_t tid = omp_get_thread_num(); \ ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \ ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \ TH_TENSOR_offset + TH_TENSOR_size / num_threads; \ ptrdiff_t TENSOR1##_len = TH_TENSOR_end - TH_TENSOR_offset; \ TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1) + TH_TENSOR_offset; \ TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2) + TH_TENSOR_offset; \ TYPE3 *TENSOR3##_data = THTensor_(data)(TENSOR3) + TH_TENSOR_offset; \ CODE \ } \ } #else #define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \ { \ TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1); \ TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2); \ TYPE3 *TENSOR3##_data = THTensor_(data)(TENSOR3); \ ptrdiff_t TENSOR1##_len = THTensor_(nElement)(TENSOR1); \ CODE \ } #endif void THTensor_(fill)(THTensor *r_, real value) { if (THTensor_(isContiguous)(r_) || THTensor_(isTransposed)(r_)) { TH_TENSOR_APPLY_CONTIG(real, r_, THVector_(fill)(r__data, value, r__len);); } else { TH_TENSOR_APPLY(real, r_, if (r__stride == 1) { THVector_(fill)(r__data, value, r__size); r__i = r__size; r__data += r__stride * r__size; break; } else { *r__data = value; } ); } } void THTensor_(zero)(THTensor *r_) { THTensor_(fill)(r_, 0); } void THTensor_(maskedFill)(THTensor *tensor, THByteTensor *mask, real value) { TH_TENSOR_APPLY2(real, tensor, unsigned char, mask, if (*mask_data > 1) { THFree(mask_counter); THFree(tensor_counter); THError("Mask tensor can take 0 and 1 values only"); } else if (*mask_data == 1) { *tensor_data = value; }); } void THTensor_(maskedCopy)(THTensor *tensor, THByteTensor *mask, THTensor* src ) { THTensor *srct = THTensor_(newContiguous)(src); real *src_data = THTensor_(data)(srct); ptrdiff_t cntr = 0; ptrdiff_t nelem = THTensor_(nElement)(srct); if (THTensor_(nElement)(tensor) != THByteTensor_nElement(mask)) { THTensor_(free)(srct); THError("Number of elements of destination tensor != Number of elements in mask"); } TH_TENSOR_APPLY2(real, tensor, unsigned char, mask, if (*mask_data > 1) { THTensor_(free)(srct); THFree(mask_counter); THFree(tensor_counter); THError("Mask tensor can take 0 and 1 values only"); } else if (*mask_data == 1) { if (cntr == nelem) { THTensor_(free)(srct); THFree(mask_counter); THFree(tensor_counter); THError("Number of elements of src < number of ones in mask"); } *tensor_data = *src_data; src_data++; cntr++; }); THTensor_(free)(srct); } void THTensor_(maskedSelect)(THTensor *tensor, THTensor *src, THByteTensor *mask) { ptrdiff_t numel = THByteTensor_sumall(mask); real *tensor_data; #ifdef DEBUG THAssert(numel <= LONG_MAX); #endif THTensor_(resize1d)(tensor,numel); tensor_data = THTensor_(data)(tensor); TH_TENSOR_APPLY2(real, src, unsigned char, mask, if (*mask_data > 1) { THFree(mask_counter); THFree(src_counter); THError("Mask tensor can take 0 and 1 values only"); } else if (*mask_data == 1) { *tensor_data = *src_data; tensor_data++; }); } // Finds non-zero elements of a tensor and returns their subscripts void THTensor_(nonzero)(THLongTensor *subscript, THTensor *tensor) { ptrdiff_t numel = 0; long *subscript_data; long i = 0; long dim; long div = 1; #ifdef TH_REAL_IS_HALF #define IS_NONZERO(val) ((val.x & 0x7fff) != 0) #else #define IS_NONZERO(val) ((val)!=0) #endif /* First Pass to determine size of subscripts */ TH_TENSOR_APPLY(real, tensor, if IS_NONZERO(*tensor_data) { ++numel; }); #ifdef DEBUG THAssert(numel <= LONG_MAX); #endif THLongTensor_resize2d(subscript, numel, tensor->nDimension); /* Second pass populates subscripts */ subscript_data = THLongTensor_data(subscript); TH_TENSOR_APPLY(real, tensor, if IS_NONZERO(*tensor_data) { div = 1; for (dim = tensor->nDimension - 1; dim >= 0; dim--) { *(subscript_data + dim) = (i/div) % tensor->size[dim]; div *= tensor->size[dim]; } subscript_data += tensor->nDimension; } ++i;); } void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index) { ptrdiff_t i, numel; THLongStorage *newSize; THTensor *tSlice, *sSlice; long *index_data; real *tensor_data, *src_data; THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); THArgCheck(dim < src->nDimension, 4,"Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE); THArgCheck(src->nDimension > 0,2,"Source tensor is empty"); numel = THLongTensor_nElement(index); newSize = THLongStorage_newWithSize(src->nDimension); THLongStorage_rawCopy(newSize,src->size); #ifdef DEBUG THAssert(numel <= LONG_MAX); #endif newSize->data[dim] = numel; THTensor_(resize)(tensor,newSize,NULL); THLongStorage_free(newSize); index = THLongTensor_newContiguous(index); index_data = THLongTensor_data(index); if (dim == 0 && THTensor_(isContiguous)(src) && THTensor_(isContiguous)(tensor)) { tensor_data = THTensor_(data)(tensor); src_data = THTensor_(data)(src); ptrdiff_t rowsize = THTensor_(nElement)(src) / src->size[0]; // check that the indices are within range long max = src->size[0] - 1 + TH_INDEX_BASE; for (i=0; i max) { THLongTensor_free(index); THError("index out of range"); } } if (src->nDimension == 1) { #pragma omp parallel for if(numel > TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; inDimension == 1) { for (i=0; inDimension == 1, 3, "Index is supposed to be a vector"); THArgCheck(dim < src->nDimension, 4, "Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE); THArgCheck(numel == src->size[dim],4,"Number of indices should be equal to source:size(dim)"); index = THLongTensor_newContiguous(index); index_data = THLongTensor_data(index); if (tensor->nDimension > 1 ) { tSlice = THTensor_(new)(); sSlice = THTensor_(new)(); for (i=0; inDimension == 1, 3, "Index is supposed to be a vector"); THArgCheck(dim < src->nDimension, 4,"Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE); THArgCheck(numel == src->size[dim],4,"Number of indices should be equal to source:size(dim)"); index = THLongTensor_newContiguous(index); index_data = THLongTensor_data(index); if (tensor->nDimension > 1) { tSlice = THTensor_(new)(); sSlice = THTensor_(new)(); for (i=0; inDimension == 1, 3, "Index is supposed to be a vector"); THArgCheck(dim < tensor->nDimension, 4,"Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE); index = THLongTensor_newContiguous(index); index_data = THLongTensor_data(index); for (i=0; inDimension > 1) { tSlice = THTensor_(new)(); THTensor_(select)(tSlice, tensor,dim,index_data[i] - TH_INDEX_BASE); THTensor_(fill)(tSlice, val); THTensor_(free)(tSlice); } else { THTensor_(set1d)(tensor, index_data[i] - TH_INDEX_BASE, val); } } THLongTensor_free(index); } void THTensor_(gather)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index) { long elems_per_row, i, idx; THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 2, "Input tensor must have same dimensions as output tensor"); THArgCheck(dim < THTensor_(nDimension)(tensor), 3, "Index dimension is out of bounds"); THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(src), 4, "Index tensor must have same dimensions as input tensor"); elems_per_row = THLongTensor_size(index, dim); TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim, for (i = 0; i < elems_per_row; ++i) { idx = *(index_data + i*index_stride); if (idx < TH_INDEX_BASE || idx >= src_size + TH_INDEX_BASE) { THFree(TH_TENSOR_DIM_APPLY_counter); THError("Invalid index in gather"); } *(tensor_data + i*tensor_stride) = src_data[(idx - TH_INDEX_BASE) * src_stride]; }) } void THTensor_(scatter)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src) { long elems_per_row, i, idx; THArgCheck(dim < THTensor_(nDimension)(tensor), 2, "Index dimension is out of bounds"); THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3, "Index tensor must have same dimensions as output tensor"); THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 4, "Input tensor must have same dimensions as output tensor"); elems_per_row = THLongTensor_size(index, dim); TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim, for (i = 0; i < elems_per_row; ++i) { idx = *(index_data + i*index_stride); if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE) { THFree(TH_TENSOR_DIM_APPLY_counter); THError("Invalid index in scatter"); } tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] = *(src_data + i*src_stride); }) } void THTensor_(scatterAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src) { long elems_per_row, i, idx; THArgCheck(dim < THTensor_(nDimension)(tensor), 2, "Index dimension is out of bounds"); THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3, "Index tensor must have same dimensions as output tensor"); THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 4, "Input tensor must have same dimensions as output tensor"); elems_per_row = THLongTensor_size(index, dim); TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim, for (i = 0; i < elems_per_row; ++i) { idx = *(index_data + i*index_stride); if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE) { THFree(TH_TENSOR_DIM_APPLY_counter); THError("Invalid index in scatterAdd"); } tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] += *(src_data + i*src_stride); }) } void THTensor_(scatterFill)(THTensor *tensor, int dim, THLongTensor *index, real val) { long elems_per_row, i, idx; THArgCheck(dim < THTensor_(nDimension)(tensor), 2, "Index dimension is out of bounds"); THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3, "Index tensor must have same dimensions as output tensor"); elems_per_row = THLongTensor_size(index, dim); TH_TENSOR_DIM_APPLY2(real, tensor, long, index, dim, for (i = 0; i < elems_per_row; ++i) { idx = *(index_data + i*index_stride); if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE) { THFree(TH_TENSOR_DIM_APPLY_counter); THError("Invalid index in scatter"); } tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] = val; }) } accreal THTensor_(dot)(THTensor *tensor, THTensor *src) { accreal sum = 0; /* we use a trick here. careful with that. */ TH_TENSOR_APPLY2(real, tensor, real, src, long sz = (tensor_size-tensor_i < src_size-src_i ? tensor_size-tensor_i : src_size-src_i); sum += THBlas_(dot)(sz, src_data, src_stride, tensor_data, tensor_stride); tensor_i += sz; src_i += sz; tensor_data += sz*tensor_stride; src_data += sz*src_stride; break;); return sum; } #undef th_isnan #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) #define th_isnan(val) \ (isnan(val)) #else #define th_isnan(val) (0) #endif #undef th_isnan_break #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) #define th_isnan_break(val) \ if (isnan(val)) break; #else #define th_isnan_break(val) #endif real THTensor_(minall)(THTensor *tensor) { real theMin; real value; THArgCheck(tensor->nDimension > 0, 1, "tensor must have one dimension"); theMin = THTensor_(data)(tensor)[0]; TH_TENSOR_APPLY(real, tensor, value = *tensor_data; /* This is not the same as value= theMin)) { theMin = value; th_isnan_break(value) }); return theMin; } real THTensor_(maxall)(THTensor *tensor) { real theMax; real value; THArgCheck(tensor->nDimension > 0, 1, "tensor must have one dimension"); theMax = THTensor_(data)(tensor)[0]; TH_TENSOR_APPLY(real, tensor, value = *tensor_data; /* This is not the same as value>theMax in the case of NaNs */ if(!(value <= theMax)) { theMax = value; th_isnan_break(value) }); return theMax; } static void THTensor_(quickselectnoidx)(real *arr, long k, long elements, long stride); real THTensor_(medianall)(THTensor *tensor) { THArgCheck(tensor->nDimension > 0, 1, "tensor must have one dimension"); real theMedian; ptrdiff_t numel; long k; THTensor *temp_; real *temp__data; numel = THTensor_(nElement)(tensor); k = (numel-1) >> 1; temp_ = THTensor_(newClone)(tensor); temp__data = THTensor_(data)(temp_); THTensor_(quickselectnoidx)(temp__data, k, numel, 1); theMedian = temp__data[k]; THTensor_(free)(temp_); return theMedian; } accreal THTensor_(sumall)(THTensor *tensor) { accreal sum = 0; TH_TENSOR_APPLY(real, tensor, sum += *tensor_data;); return sum; } accreal THTensor_(prodall)(THTensor *tensor) { accreal prod = 1; TH_TENSOR_APPLY(real, tensor, prod *= *tensor_data;); return prod; } void THTensor_(add)(THTensor *r_, THTensor *t, real value) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(adds)(r__data, t_data, value, r__len);); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data + value;); } } void THTensor_(sub)(THTensor *r_, THTensor *t, real value) { THTensor_(add)(r_, t, -value); } void THTensor_(mul)(THTensor *r_, THTensor *t, real value) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(muls)(r__data, t_data, value, r__len);); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data * value;); } } void THTensor_(div)(THTensor *r_, THTensor *t, real value) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(divs)(r__data, t_data, value, r__len);); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data / value;); } } void THTensor_(lshift)(THTensor *r_, THTensor *t, real value) { #if defined(TH_REAL_IS_FLOAT) return THTensor_(mul)(r_, t, powf(2, value)); #elif defined(TH_REAL_IS_DOUBLE) return THTensor_(mul)(r_, t, pow(2, value)); #elif defined(TH_REAL_IS_HALF) return THError("lshift is not supported for torch.HalfTensor"); #else THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { real *tp = THTensor_(data)(t); real *rp = THTensor_(data)(r_); long sz = THTensor_(nElement)(t); long i; #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) for (i=0; i> value; #else rp[i] = ((unsigned real) tp[i]) >> value; #endif } } else { #if defined(TH_REAL_IS_BYTE) TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((real) *t_data) >> value);); #else TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((unsigned real) *t_data) >> value);); #endif } #endif } void THTensor_(fmod)(THTensor *r_, THTensor *t, real value) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { real *tp = THTensor_(data)(t); real *rp = THTensor_(data)(r_); ptrdiff_t sz = THTensor_(nElement)(t); ptrdiff_t i; #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i max_value ? max_value : tp[i]); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (*t_data < min_value) ? min_value : (*t_data > max_value ? max_value : *t_data);); } } void THTensor_(cadd)(THTensor *r_, THTensor *t, real value, THTensor *src) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { if(r_ == t) { THBlas_(axpy)(THTensor_(nElement)(t), value, THTensor_(data)(src), 1, THTensor_(data)(r_), 1); } else { TH_TENSOR_APPLY3_CONTIG(real, r_, real, t, real, src, THVector_(cadd)(r__data, t_data, src_data, value, r__len);); } } else { TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data + value * *src_data;); } } void THTensor_(csub)(THTensor *r_, THTensor *t, real value,THTensor *src) { THTensor_(cadd)(r_, t, -value, src); } void THTensor_(cmul)(THTensor *r_, THTensor *t, THTensor *src) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { TH_TENSOR_APPLY3_CONTIG(real, r_, real, t, real, src, THVector_(cmul)(r__data, t_data, src_data, r__len);); } else { TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data * *src_data;); } } void THTensor_(cpow)(THTensor *r_, THTensor *t, THTensor *src) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { real *tp = THTensor_(data)(t); real *sp = THTensor_(data)(src); real *rp = THTensor_(data)(r_); ptrdiff_t sz = THTensor_(nElement)(t); ptrdiff_t i; #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i> sp[i]; #else rp[i] = ((unsigned real) tp[i]) >> sp[i]; #endif } } else { #if defined(TH_REAL_IS_FLOAT) TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data / powf(2, *src_data);); #elif defined(TH_REAL_IS_DOUBLE) TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data / pow(2, *src_data);); #elif defined(TH_REAL_IS_BYTE) TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((real)*t_data) >> *src_data;); #else TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((unsigned real)*t_data) >> *src_data;); #endif } } void THTensor_(cfmod)(THTensor *r_, THTensor *t, THTensor *src) { THTensor_(resizeAs)(r_, t); if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { real *tp = THTensor_(data)(t); real *sp = THTensor_(data)(src); real *rp = THTensor_(data)(r_); ptrdiff_t sz = THTensor_(nElement)(t); ptrdiff_t i; #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; inDimension != 2) || (vec->nDimension != 1) ) THError("matrix and vector expected, got %dD, %dD", mat->nDimension, vec->nDimension); if( mat->size[1] != vec->size[0] ) { THDescBuff bm = THTensor_(sizeDesc)(mat); THDescBuff bv = THTensor_(sizeDesc)(vec); THError("size mismatch, %s, %s", bm.str, bv.str); } if(t->nDimension != 1) THError("vector expected, got t: %dD", t->nDimension); if(t->size[0] != mat->size[0]) { THDescBuff bt = THTensor_(sizeDesc)(t); THDescBuff bm = THTensor_(sizeDesc)(mat); THError("size mismatch, t: %s, mat: %s", bt.str, bm.str); } if(r_ != t) { THTensor_(resizeAs)(r_, t); THTensor_(copy)(r_, t); } if(mat->stride[0] == 1) { THBlas_(gemv)('n', mat->size[0], mat->size[1], alpha, THTensor_(data)(mat), mat->stride[1], THTensor_(data)(vec), vec->stride[0], beta, THTensor_(data)(r_), r_->stride[0]); } else if(mat->stride[1] == 1) { THBlas_(gemv)('t', mat->size[1], mat->size[0], alpha, THTensor_(data)(mat), mat->stride[0], THTensor_(data)(vec), vec->stride[0], beta, THTensor_(data)(r_), r_->stride[0]); } else { THTensor *cmat = THTensor_(newContiguous)(mat); THBlas_(gemv)('t', mat->size[1], mat->size[0], alpha, THTensor_(data)(cmat), cmat->stride[0], THTensor_(data)(vec), vec->stride[0], beta, THTensor_(data)(r_), r_->stride[0]); THTensor_(free)(cmat); } } void THTensor_(match)(THTensor *r_, THTensor *m1, THTensor *m2, real gain) { long N1 = m1->size[0]; long N2 = m2->size[0]; long dim; real *m1_p; real *m2_p; real *r_p; long i; THTensor_(resize2d)(r_, N1, N2); m1 = THTensor_(newContiguous)(m1); m2 = THTensor_(newContiguous)(m2); THTensor_(resize2d)(m1, N1, THTensor_(nElement)(m1) / N1); THTensor_(resize2d)(m2, N2, THTensor_(nElement)(m2) / N2); dim = m1->size[1]; THArgCheck(m1->size[1] == m2->size[1], 3, "m1 and m2 must have the same inner vector dim"); m1_p = THTensor_(data)(m1); m2_p = THTensor_(data)(m2); r_p = THTensor_(data)(r_); #pragma omp parallel for private(i) for (i=0; inDimension != 2) || (m2->nDimension != 2)) THError("matrices expected, got %dD, %dD tensors", m1->nDimension, m2->nDimension); if(m1->size[1] != m2->size[0]) { THDescBuff bm1 = THTensor_(sizeDesc)(m1); THDescBuff bm2 = THTensor_(sizeDesc)(m2); THError("size mismatch, m1: %s, m2: %s", bm1.str, bm2.str); } if( t->nDimension != 2 ) THError("matrix expected, got %dD tensor for t", t->nDimension); if( (t->size[0] != m1->size[0]) || (t->size[1] != m2->size[1]) ) { THDescBuff bt = THTensor_(sizeDesc)(t); THDescBuff bm1 = THTensor_(sizeDesc)(m1); THDescBuff bm2 = THTensor_(sizeDesc)(m2); THError("size mismatch, t: %s, m1: %s, m2: %s", bt.str, bm1.str, bm2.str); } if(t != r_) { THTensor_(resizeAs)(r_, t); if (beta != 0.0) { THTensor_(copy)(r_, t); } } /* r_ */ if(r_->stride[0] == 1 && r_->stride[1] != 0) { transpose_r = 'n'; r__ = r_; } else if(r_->stride[1] == 1 && r_->stride[0] != 0) { THTensor *swap = m2; m2 = m1; m1 = swap; transpose_r = 't'; r__ = r_; } else { transpose_r = 'n'; THTensor *transp_r_ = THTensor_(newTranspose)(r_, 0, 1); r__ = THTensor_(newClone)(transp_r_); THTensor_(free)(transp_r_); THTensor_(transpose)(r__, NULL, 0, 1); } /* m1 */ if(m1->stride[(transpose_r == 'n' ? 0 : 1)] == 1 && m1->stride[(transpose_r == 'n' ? 1 : 0)] != 0) { transpose_m1 = 'n'; m1_ = m1; } else if(m1->stride[(transpose_r == 'n' ? 1 : 0)] == 1 && m1->stride[(transpose_r == 'n' ? 0 : 1)] != 0) { transpose_m1 = 't'; m1_ = m1; } else { transpose_m1 = (transpose_r == 'n' ? 't' : 'n'); m1_ = THTensor_(newContiguous)(m1); } /* m2 */ if(m2->stride[(transpose_r == 'n' ? 0 : 1)] == 1 && m2->stride[(transpose_r == 'n' ? 1 : 0)] != 0) { transpose_m2 = 'n'; m2_ = m2; } else if(m2->stride[(transpose_r == 'n' ? 1 : 0)] == 1 && m2->stride[(transpose_r == 'n' ? 0 : 1)] != 0) { transpose_m2 = 't'; m2_ = m2; } else { transpose_m2 = (transpose_r == 'n' ? 't' : 'n'); m2_ = THTensor_(newContiguous)(m2); } #pragma omp critical(blasgemm) /* do the operation */ THBlas_(gemm)(transpose_m1, transpose_m2, r__->size[(transpose_r == 'n' ? 0 : 1)], r__->size[(transpose_r == 'n' ? 1 : 0)], m1_->size[(transpose_r == 'n' ? 1 : 0)], alpha, THTensor_(data)(m1_), (transpose_m1 == 'n' ? m1_->stride[(transpose_r == 'n' ? 1 : 0)] : m1_->stride[(transpose_r == 'n' ? 0 : 1)]), THTensor_(data)(m2_), (transpose_m2 == 'n' ? m2_->stride[(transpose_r == 'n' ? 1 : 0)] : m2_->stride[(transpose_r == 'n' ? 0 : 1)]), beta, THTensor_(data)(r__), r__->stride[(transpose_r == 'n' ? 1 : 0)]); /* free intermediate variables */ if(m1_ != m1) THTensor_(free)(m1_); if(m2_ != m2) THTensor_(free)(m2_); if(r__ != r_) THTensor_(freeCopyTo)(r__, r_); } void THTensor_(addr)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *vec1, THTensor *vec2) { if( (vec1->nDimension != 1) || (vec2->nDimension != 1) ) THError("vector and vector expected, got %dD, %dD tensors", vec1->nDimension, vec2->nDimension); if(t->nDimension != 2) THError("expected matrix, got %dD tensor for t", t->nDimension); if( (t->size[0] != vec1->size[0]) || (t->size[1] != vec2->size[0]) ) { THDescBuff bt = THTensor_(sizeDesc)(t); THDescBuff bv1 = THTensor_(sizeDesc)(vec1); THDescBuff bv2 = THTensor_(sizeDesc)(vec2); THError("size mismatch, t: %s, vec1: %s, vec2: %s", bt.str, bv1.str, bv2.str); } if(r_ != t) { THTensor_(resizeAs)(r_, t); THTensor_(copy)(r_, t); } if(beta == 0) { THTensor_(zero)(r_); } else if(beta != 1) THTensor_(mul)(r_, r_, beta); if(r_->stride[0] == 1) { THBlas_(ger)(vec1->size[0], vec2->size[0], alpha, THTensor_(data)(vec1), vec1->stride[0], THTensor_(data)(vec2), vec2->stride[0], THTensor_(data)(r_), r_->stride[1]); } else if(r_->stride[1] == 1) { THBlas_(ger)(vec2->size[0], vec1->size[0], alpha, THTensor_(data)(vec2), vec2->stride[0], THTensor_(data)(vec1), vec1->stride[0], THTensor_(data)(r_), r_->stride[0]); } else { THTensor *cr = THTensor_(newClone)(r_); THBlas_(ger)(vec2->size[0], vec1->size[0], alpha, THTensor_(data)(vec2), vec2->stride[0], THTensor_(data)(vec1), vec1->stride[0], THTensor_(data)(cr), cr->stride[0]); THTensor_(freeCopyTo)(cr, r_); } } void THTensor_(addbmm)(THTensor *result, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2) { long batch; THArgCheck(THTensor_(nDimension)(batch1) == 3, 1, "expected 3D tensor"); THArgCheck(THTensor_(nDimension)(batch2) == 3, 2, "expected 3D tensor"); THArgCheck(THTensor_(size)(batch1, 0) == THTensor_(size)(batch2, 0), 2, "equal number of batches expected, got %d, %d", THTensor_(size)(batch1, 0), THTensor_(size)(batch2, 0)); THArgCheck(THTensor_(size)(batch1, 2) == THTensor_(size)(batch2, 1), 2, "wrong matrix size, batch1: %dx%d, batch2: %dx%d", THTensor_(size)(batch1, 1), THTensor_(size)(batch1,2), THTensor_(size)(batch2, 1), THTensor_(size)(batch2,2)); long dim1 = THTensor_(size)(batch1, 1); long dim2 = THTensor_(size)(batch2, 2); THArgCheck(THTensor_(size)(t, 0) == dim1, 1, "output tensor of incorrect size"); THArgCheck(THTensor_(size)(t, 1) == dim2, 1, "output tensor of incorrect size"); if (t != result) { THTensor_(resizeAs)(result, t); if (beta != 0.0) { THTensor_(copy)(result, t); } } THTensor *matrix1 = THTensor_(new)(); THTensor *matrix2 = THTensor_(new)(); for (batch = 0; batch < THTensor_(size)(batch1, 0); ++batch) { THTensor_(select)(matrix1, batch1, 0, batch); THTensor_(select)(matrix2, batch2, 0, batch); THTensor_(addmm)(result, beta, result, alpha, matrix1, matrix2); beta = 1; // accumulate output once } THTensor_(free)(matrix1); THTensor_(free)(matrix2); } void THTensor_(baddbmm)(THTensor *result, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2) { long batch; THArgCheck(THTensor_(nDimension)(batch1) == 3, 1, "expected 3D tensor, got %dD", THTensor_(nDimension)(batch1)); THArgCheck(THTensor_(nDimension)(batch2) == 3, 2, "expected 3D tensor, got %dD", THTensor_(nDimension)(batch2)); THArgCheck(THTensor_(size)(batch1, 0) == THTensor_(size)(batch2, 0), 2, "equal number of batches expected, got %d, %d", THTensor_(size)(batch1, 0), THTensor_(size)(batch2, 0)); THArgCheck(THTensor_(size)(batch1, 2) == THTensor_(size)(batch2, 1), 2, "wrong matrix size, batch1: %dx%d, batch2: %dx%d", THTensor_(size)(batch1, 1), THTensor_(size)(batch1, 2), THTensor_(size)(batch2, 1), THTensor_(size)(batch2, 2)); long bs = THTensor_(size)(batch1, 0); long dim1 = THTensor_(size)(batch1, 1); long dim2 = THTensor_(size)(batch2, 2); THArgCheck(THTensor_(size)(t, 0) == bs, 1, "output tensor of incorrect size"); THArgCheck(THTensor_(size)(t, 1) == dim1, 1, "output tensor of incorrect size"); THArgCheck(THTensor_(size)(t, 2) == dim2, 1, "output tensor of incorrect size"); if (t != result) { THTensor_(resizeAs)(result, t); if (beta != 0.0) { THTensor_(copy)(result, t); } } THTensor *matrix1 = THTensor_(new)(); THTensor *matrix2 = THTensor_(new)(); THTensor *result_matrix = THTensor_(new)(); for (batch = 0; batch < THTensor_(size)(batch1, 0); ++batch) { THTensor_(select)(matrix1, batch1, 0, batch); THTensor_(select)(matrix2, batch2, 0, batch); THTensor_(select)(result_matrix, result, 0, batch); THTensor_(addmm)(result_matrix, beta, result_matrix, alpha, matrix1, matrix2); } THTensor_(free)(matrix1); THTensor_(free)(matrix2); THTensor_(free)(result_matrix); } ptrdiff_t THTensor_(numel)(THTensor *t) { return THTensor_(nElement)(t); } void THTensor_(max)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim) { THLongStorage *dim; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", dimension + TH_INDEX_BASE); dim = THTensor_(newSizeOf)(t); THLongStorage_set(dim, dimension, 1); THTensor_(resize)(values_, dim, NULL); THLongTensor_resize(indices_, dim, NULL); THLongStorage_free(dim); // two implementations optimized for data locality if (t->stride[dimension] == 1) { real theMax; real value; long theIndex; long i; TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension, theMax = t_data[0]; theIndex = 0; for(i = 0; i < t_size; i++) { value = t_data[i*t_stride]; /* This is not the same as value>theMax in the case of NaNs */ if(!(value <= theMax)) { theIndex = i; theMax = value; th_isnan_break(value) } } *indices__data = theIndex; *values__data = theMax;); } else { if (THTensor_(nDimension)(t) > 1) { THTensor *t0 = THTensor_(newSelect)(t, dimension, 0); THTensor_(copy)(values_, t0); THTensor_(free)(t0); } else { THTensor_(fill)(values_, THTensor_(get1d)(t, 0)); } THLongTensor_zero(indices_); if(t->size[dimension] == 1) { if (!keepdim) { THTensor_(squeeze1d)(values_, values_, dimension); THLongTensor_squeeze1d(indices_, indices_, dimension); } return; } THTensor *tempValues_ = THTensor_(newWithTensor)(values_); // tempValues_.expand_as(t) tempValues_->size[dimension] = t->size[dimension]; tempValues_->stride[dimension] = 0; THLongTensor *tempIndices_ = THLongTensor_newWithTensor(indices_); // tempIndices_.expand_as(t) tempIndices_->size[dimension] = t->size[dimension]; tempIndices_->stride[dimension] = 0; TH_TENSOR_APPLY3_D(real, t, real, tempValues_, long, tempIndices_, dimension, if(!(*t_data <= *tempValues__data) && !th_isnan(*tempValues__data)) { *tempValues__data = *t_data; *tempIndices__data = *tempIndices__dimOffset; }); THTensor_(free)(tempValues_); THLongTensor_free(tempIndices_); } if (!keepdim) { THTensor_(squeeze1d)(values_, values_, dimension); THLongTensor_squeeze1d(indices_, indices_, dimension); } } void THTensor_(min)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim) { THLongStorage *dim; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", dimension + TH_INDEX_BASE); dim = THTensor_(newSizeOf)(t); THLongStorage_set(dim, dimension, 1); THTensor_(resize)(values_, dim, NULL); THLongTensor_resize(indices_, dim, NULL); THLongStorage_free(dim); // two implementations optimized for data locality if (t->stride[dimension] == 1) { real theMax; real value; long theIndex; long i; TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension, theMax = t_data[0]; theIndex = 0; for(i = 0; i < t_size; i++) { value = t_data[i*t_stride]; /* This is not the same as value>theMax in the case of NaNs */ if(!(value >= theMax)) { theIndex = i; theMax = value; th_isnan_break(value) } } *indices__data = theIndex; *values__data = theMax;); } else { if (THTensor_(nDimension)(t) > 1) { THTensor *t0 = THTensor_(newSelect)(t, dimension, 0); THTensor_(copy)(values_, t0); THTensor_(free)(t0); } else { THTensor_(fill)(values_, THTensor_(get1d)(t, 0)); } THLongTensor_zero(indices_); if(t->size[dimension] == 1) { if (!keepdim) { THTensor_(squeeze1d)(values_, values_, dimension); THLongTensor_squeeze1d(indices_, indices_, dimension); } return; } THTensor *tempValues_ = THTensor_(newWithTensor)(values_); // tempValues_.expand_as(t) tempValues_->size[dimension] = t->size[dimension]; tempValues_->stride[dimension] = 0; THLongTensor *tempIndices_ = THLongTensor_newWithTensor(indices_); // tempIndices_.expand_as(t) tempIndices_->size[dimension] = t->size[dimension]; tempIndices_->stride[dimension] = 0; TH_TENSOR_APPLY3_D(real, t, real, tempValues_, long, tempIndices_, dimension, if(!(*t_data >= *tempValues__data) && !th_isnan(*tempValues__data)) { *tempValues__data = *t_data; *tempIndices__data = *tempIndices__dimOffset; }); } if (!keepdim) { THTensor_(squeeze1d)(values_, values_, dimension); THLongTensor_squeeze1d(indices_, indices_, dimension); } } void THTensor_(sum)(THTensor *r_, THTensor *t, int dimension, int keepdim) { THLongStorage *dim; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", dimension + TH_INDEX_BASE); dim = THTensor_(newSizeOf)(t); THLongStorage_set(dim, dimension, 1); THTensor_(resize)(r_, dim, NULL); THLongStorage_free(dim); // two implementations optimized for data locality if (t->stride[dimension] == 1) { TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, accreal sum = 0; long i; for(i = 0; i < t_size; i++) sum += t_data[i*t_stride]; *r__data = (real)sum;); } else { THTensor_(zero)(r_); THTensor *temp_ = THTensor_(newWithTensor)(r_); // r_.expand_as(t) temp_->size[dimension] = t->size[dimension]; temp_->stride[dimension] = 0; TH_TENSOR_APPLY2(real, temp_, real, t, *temp__data = *temp__data + *t_data;); THTensor_(free)(temp_); } if (!keepdim) { THTensor_(squeeze1d)(r_, r_, dimension); } } void THTensor_(prod)(THTensor *r_, THTensor *t, int dimension, int keepdim) { THLongStorage *dim; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", dimension + TH_INDEX_BASE); dim = THTensor_(newSizeOf)(t); THLongStorage_set(dim, dimension, 1); THTensor_(resize)(r_, dim, NULL); THLongStorage_free(dim); // two implementations optimized for data locality if (t->stride[dimension] == 1) { TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, accreal prod = 1; long i; for(i = 0; i < t_size; i++) prod *= t_data[i*t_stride]; *r__data = (real)prod;); } else { THTensor_(fill)(r_, 1); THTensor *temp_ = THTensor_(newWithTensor)(r_); // r_.expand_as(t) temp_->size[dimension] = t->size[dimension]; temp_->stride[dimension] = 0; TH_TENSOR_APPLY2(real, temp_, real, t, *temp__data = *temp__data * *t_data;); THTensor_(free)(temp_); } if (!keepdim) { THTensor_(squeeze1d)(r_, r_, dimension); } } void THTensor_(cumsum)(THTensor *r_, THTensor *t, int dimension) { THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", dimension + TH_INDEX_BASE); THTensor_(resizeAs)(r_, t); TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, accreal cumsum = 0; long i; for(i = 0; i < t_size; i++) { cumsum += t_data[i*t_stride]; r__data[i*r__stride] = (real)cumsum; }); } void THTensor_(cumprod)(THTensor *r_, THTensor *t, int dimension) { THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", dimension + TH_INDEX_BASE); THTensor_(resizeAs)(r_, t); TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, accreal cumprod = 1; long i; for(i = 0; i < t_size; i++) { cumprod *= t_data[i*t_stride]; r__data[i*r__stride] = (real)cumprod; }); } void THTensor_(sign)(THTensor *r_, THTensor *t) { THTensor_(resizeAs)(r_, t); #if defined (TH_REAL_IS_BYTE) TH_TENSOR_APPLY2(real, r_, real, t, if (*t_data > 0) *r__data = 1; else *r__data = 0;); #else TH_TENSOR_APPLY2(real, r_, real, t, if (*t_data > 0) *r__data = 1; else if (*t_data < 0) *r__data = -1; else *r__data = 0;); #endif } accreal THTensor_(trace)(THTensor *t) { real *t_data = THTensor_(data)(t); accreal sum = 0; long i = 0; long t_stride_0, t_stride_1, t_diag_size; THArgCheck(THTensor_(nDimension)(t) == 2, 1, "expected a matrix"); t_stride_0 = THTensor_(stride)(t, 0); t_stride_1 = THTensor_(stride)(t, 1); t_diag_size = THMin(THTensor_(size)(t, 0), THTensor_(size)(t, 1)); while(i < t_diag_size) { sum += t_data[i*(t_stride_0+t_stride_1)]; i++; } return sum; } void THTensor_(cross)(THTensor *r_, THTensor *a, THTensor *b, int dimension) { int i; if(THTensor_(nDimension)(a) != THTensor_(nDimension)(b)) THError("inconsistent tensor dimension %dD, %dD", THTensor_(nDimension)(a), THTensor_(nDimension)(b)); for(i = 0; i < THTensor_(nDimension)(a); i++) { if(THTensor_(size)(a, i) != THTensor_(size)(b, i)) { THDescBuff ba = THTensor_(sizeDesc)(a); THDescBuff bb = THTensor_(sizeDesc)(b); THError("inconsistent tensor sizes %s, %s", ba.str, bb.str); } } if(dimension < 0) { for(i = 0; i < THTensor_(nDimension)(a); i++) { if(THTensor_(size)(a, i) == 3) { dimension = i; break; } } if(dimension < 0) { THDescBuff ba = THTensor_(sizeDesc)(a); THError("no dimension of size 3 in a: %s", ba.str); } } THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(a), 3, "dimension %d out of range", dimension + TH_INDEX_BASE); THArgCheck(THTensor_(size)(a, dimension) == 3, 3, "dimension %d does not have size 3", dimension + TH_INDEX_BASE); THTensor_(resizeAs)(r_, a); TH_TENSOR_DIM_APPLY3(real, a, real, b, real, r_, dimension, r__data[0*r__stride] = a_data[1*a_stride]*b_data[2*b_stride] - a_data[2*a_stride]*b_data[1*b_stride]; r__data[1*r__stride] = a_data[2*a_stride]*b_data[0*b_stride] - a_data[0*a_stride]*b_data[2*b_stride]; r__data[2*r__stride] = a_data[0*a_stride]*b_data[1*b_stride] - a_data[1*a_stride]*b_data[0*b_stride];); } void THTensor_(cmax)(THTensor *r, THTensor *t, THTensor *src) { THTensor_(resizeAs)(r, t); TH_TENSOR_APPLY3(real, r, real, t, real, src, *r_data = *t_data > *src_data ? *t_data : *src_data;); } void THTensor_(cmin)(THTensor *r, THTensor *t, THTensor *src) { THTensor_(resizeAs)(r, t); TH_TENSOR_APPLY3(real, r, real, t, real, src, *r_data = *t_data < *src_data ? *t_data : *src_data;); } void THTensor_(cmaxValue)(THTensor *r, THTensor *t, real value) { THTensor_(resizeAs)(r, t); TH_TENSOR_APPLY2(real, r, real, t, *r_data = *t_data > value ? *t_data : value;); } void THTensor_(cminValue)(THTensor *r, THTensor *t, real value) { THTensor_(resizeAs)(r, t); TH_TENSOR_APPLY2(real, r, real, t, *r_data = *t_data < value ? *t_data : value;); } void THTensor_(zeros)(THTensor *r_, THLongStorage *size) { THTensor_(resize)(r_, size, NULL); THTensor_(zero)(r_); } void THTensor_(zerosLike)(THTensor *r_, THTensor *input) { THTensor_(resizeAs)(r_, input); THTensor_(zero)(r_); } void THTensor_(onesLike)(THTensor *r_, THTensor *input) { THTensor_(resizeAs)(r_, input); THTensor_(fill)(r_, 1); } void THTensor_(ones)(THTensor *r_, THLongStorage *size) { THTensor_(resize)(r_, size, NULL); THTensor_(fill)(r_, 1); } void THTensor_(diag)(THTensor *r_, THTensor *t, int k) { THArgCheck(THTensor_(nDimension)(t) == 1 || THTensor_(nDimension)(t) == 2, 1, "matrix or a vector expected"); if(THTensor_(nDimension)(t) == 1) { real *t_data = THTensor_(data)(t); long t_stride_0 = THTensor_(stride)(t, 0); long t_size = THTensor_(size)(t, 0); long sz = t_size + (k >= 0 ? k : -k); real *r__data; long r__stride_0; long r__stride_1; long i; THTensor_(resize2d)(r_, sz, sz); THTensor_(zero)(r_); r__data = THTensor_(data)(r_); r__stride_0 = THTensor_(stride)(r_, 0); r__stride_1 = THTensor_(stride)(r_, 1); r__data += (k >= 0 ? k*r__stride_1 : -k*r__stride_0); for(i = 0; i < t_size; i++) r__data[i*(r__stride_0+r__stride_1)] = t_data[i*t_stride_0]; } else { real *t_data = THTensor_(data)(t); long t_stride_0 = THTensor_(stride)(t, 0); long t_stride_1 = THTensor_(stride)(t, 1); long sz; real *r__data; long r__stride_0; long i; if(k >= 0) sz = THMin(THTensor_(size)(t, 0), THTensor_(size)(t, 1)-k); else sz = THMin(THTensor_(size)(t, 0)+k, THTensor_(size)(t, 1)); THTensor_(resize1d)(r_, sz); r__data = THTensor_(data)(r_); r__stride_0 = THTensor_(stride)(r_, 0); t_data += (k >= 0 ? k*t_stride_1 : -k*t_stride_0); for(i = 0; i < sz; i++) r__data[i*r__stride_0] = t_data[i*(t_stride_0+t_stride_1)]; } } void THTensor_(eye)(THTensor *r_, long n, long m) { real *r__data; long i, sz; THArgCheck(n > 0, 1, "invalid argument"); if(m <= 0) m = n; THTensor_(resize2d)(r_, n, m); THTensor_(zero)(r_); i = 0; r__data = THTensor_(data)(r_); sz = THMin(THTensor_(size)(r_, 0), THTensor_(size)(r_, 1)); for(i = 0; i < sz; i++) r__data[i*(r_->stride[0]+r_->stride[1])] = 1; } void THTensor_(range)(THTensor *r_, accreal xmin, accreal xmax, accreal step) { ptrdiff_t size; real i = 0; THArgCheck(step > 0 || step < 0, 3, "step must be a non-null number"); THArgCheck(((step > 0) && (xmax >= xmin)) || ((step < 0) && (xmax <= xmin)) , 2, "upper bound and larger bound incoherent with step sign"); size = (ptrdiff_t) (((xmax - xmin) / step) + 1); if (THTensor_(nElement)(r_) != size) { THTensor_(resize1d)(r_, size); } TH_TENSOR_APPLY(real, r_, *r__data = xmin + (i++)*step;); } void THTensor_(arange)(THTensor *r_, accreal xmin, accreal xmax, accreal step) { #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) int m = fmod(xmax - xmin,step) == 0; #else int m = (xmax - xmin) % step == 0; #endif if (m) xmax -= step; THTensor_(range)(r_,xmin,xmax,step); } void THTensor_(randperm)(THTensor *r_, THGenerator *_generator, long n) { real *r__data; long r__stride_0; long i; THArgCheck(n > 0, 1, "must be strictly positive"); THTensor_(resize1d)(r_, n); r__data = THTensor_(data)(r_); r__stride_0 = THTensor_(stride)(r_,0); for(i = 0; i < n; i++) r__data[i*r__stride_0] = (real)(i); for(i = 0; i < n-1; i++) { long z = THRandom_random(_generator) % (n-i); real sav = r__data[i*r__stride_0]; r__data[i*r__stride_0] = r__data[(z+i)*r__stride_0]; r__data[(z+i)*r__stride_0] = sav; } } void THTensor_(reshape)(THTensor *r_, THTensor *t, THLongStorage *size) { THTensor_(resize)(r_, size, NULL); THTensor_(copy)(r_, t); } /* I cut and pasted (slightly adapted) the quicksort code from Sedgewick's 1978 "Implementing Quicksort Programs" article http://www.csie.ntu.edu.tw/~b93076/p847-sedgewick.pdf It is the state of the art existing implementation. The macros are here to make as close a match as possible to the pseudocode of Program 2 p.851 Note that other partition schemes exist, and are typically presented in textbook, but those are less efficient. See e.g. http://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto Julien, November 12th 2013 */ #define MAX_LEVELS 300 #define M_SMALL 10 /* Limit for small subfiles */ #define ARR(III) arr[(III)*stride] #define IDX(III) idx[(III)*stride] #define LONG_SWAP(AAA, BBB) swap = AAA; AAA = BBB; BBB = swap #define REAL_SWAP(AAA, BBB) rswap = AAA; AAA = BBB; BBB = rswap #define ARR_SWAP(III, JJJ) \ REAL_SWAP(ARR(III), ARR(JJJ)); #define BOTH_SWAP(III, JJJ) \ REAL_SWAP(ARR(III), ARR(JJJ)); \ LONG_SWAP(IDX(III), IDX(JJJ)) static void THTensor_(quicksortascend)(real *arr, long *idx, long elements, long stride) { long beg[MAX_LEVELS], end[MAX_LEVELS], i, j, L, R, P, swap, pid, stack = 0, sz_right, sz_left; real rswap, piv; unsigned char done = 0; /* beg[0]=0; end[0]=elements; */ stack = 0; L = 0; R = elements-1; done = elements-1 <= M_SMALL; while(!done) { /* Use median of three for pivot choice */ P=(L+R)>>1; BOTH_SWAP(P, L+1); if (ARR(L+1) > ARR(R)) { BOTH_SWAP(L+1, R); } if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); } if (ARR(L+1) > ARR(L)) { BOTH_SWAP(L+1, L); } i = L+1; j = R; piv = ARR(L); pid = IDX(L); do { do { i = i+1; } while(ARR(i) < piv); do { j = j-1; } while(ARR(j) > piv); if (j < i) break; BOTH_SWAP(i, j); } while(1); BOTH_SWAP(L, j); /* Left subfile is (L, j-1) */ /* Right subfile is (i, R) */ sz_left = j-L; sz_right = R-i+1; if (sz_left <= M_SMALL && sz_right <= M_SMALL) { /* both subfiles are small */ /* if stack empty */ if (stack == 0) { done = 1; } else { stack--; L = beg[stack]; R = end[stack]; } } else if (sz_left <= M_SMALL || sz_right <= M_SMALL) { /* exactly one of the subfiles is small */ /* (L,R) = large subfile */ if (sz_left > sz_right) { /* Implicit: L = L; */ R = j-1; } else { L = i; /* Implicit: R = R; */ } } else { /* none of the subfiles is small */ /* push large subfile */ /* (L,R) = small subfile */ if (sz_left > sz_right) { beg[stack] = L; end[stack] = j-1; stack++; L = i; /* Implicit: R = R */ } else { beg[stack] = i; end[stack] = R; stack++; /* Implicit: L = L; */ R = j-1; } } } /* while not done */ /* Now insertion sort on the concatenation of subfiles */ for(i=elements-2; i>=0; i--) { if (ARR(i) > ARR(i+1)) { piv = ARR(i); pid = IDX(i); j = i+1; do { ARR(j-1) = ARR(j); IDX(j-1) = IDX(j); j = j+1; } while(j < elements && ARR(j) < piv); ARR(j-1) = piv; IDX(j-1) = pid; } } } static void THTensor_(quicksortdescend)(real *arr, long *idx, long elements, long stride) { long beg[MAX_LEVELS], end[MAX_LEVELS], i, j, L, R, P, swap, pid, stack = 0, sz_right, sz_left; real rswap, piv; unsigned char done = 0; /* beg[0]=0; end[0]=elements; */ stack = 0; L = 0; R = elements-1; done = elements-1 <= M_SMALL; while(!done) { /* Use median of three for pivot choice */ P=(L+R)>>1; BOTH_SWAP(P, L+1); if (ARR(L+1) < ARR(R)) { BOTH_SWAP(L+1, R); } if (ARR(L) < ARR(R)) { BOTH_SWAP(L, R); } if (ARR(L+1) < ARR(L)) { BOTH_SWAP(L+1, L); } i = L+1; j = R; piv = ARR(L); pid = IDX(L); do { do { i = i+1; } while(ARR(i) > piv); do { j = j-1; } while(ARR(j) < piv); if (j < i) break; BOTH_SWAP(i, j); } while(1); BOTH_SWAP(L, j); /* Left subfile is (L, j-1) */ /* Right subfile is (i, R) */ sz_left = j-L; sz_right = R-i+1; if (sz_left <= M_SMALL && sz_right <= M_SMALL) { /* both subfiles are small */ /* if stack empty */ if (stack == 0) { done = 1; } else { stack--; L = beg[stack]; R = end[stack]; } } else if (sz_left <= M_SMALL || sz_right <= M_SMALL) { /* exactly one of the subfiles is small */ /* (L,R) = large subfile */ if (sz_left > sz_right) { /* Implicit: L = L; */ R = j-1; } else { L = i; /* Implicit: R = R; */ } } else { /* none of the subfiles is small */ /* push large subfile */ /* (L,R) = small subfile */ if (sz_left > sz_right) { beg[stack] = L; end[stack] = j-1; stack++; L = i; /* Implicit: R = R */ } else { beg[stack] = i; end[stack] = R; stack++; /* Implicit: L = L; */ R = j-1; } } } /* while not done */ /* Now insertion sort on the concatenation of subfiles */ for(i=elements-2; i>=0; i--) { if (ARR(i) < ARR(i+1)) { piv = ARR(i); pid = IDX(i); j = i+1; do { ARR(j-1) = ARR(j); IDX(j-1) = IDX(j); j = j+1; } while(j < elements && ARR(j) > piv); ARR(j-1) = piv; IDX(j-1) = pid; } } } #undef MAX_LEVELS #undef M_SMALL void THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int dimension, int descendingOrder) { THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "invalid dimension %d", dimension + TH_INDEX_BASE); THTensor_(resizeAs)(rt_, t); THTensor_(copy)(rt_, t); { THLongStorage *size = THTensor_(newSizeOf)(t); THLongTensor_resize(ri_, size, NULL); THLongStorage_free(size); } if(descendingOrder) { TH_TENSOR_DIM_APPLY2(real, rt_, long, ri_, dimension, long i; for(i = 0; i < ri__size; i++) ri__data[i*ri__stride] = i; THTensor_(quicksortdescend)(rt__data, ri__data, rt__size, rt__stride);) } else { TH_TENSOR_DIM_APPLY2(real, rt_, long, ri_, dimension, long i; for(i = 0; i < ri__size; i++) ri__data[i*ri__stride] = i; THTensor_(quicksortascend)(rt__data, ri__data, rt__size, rt__stride);) } } /* Implementation of the Quickselect algorithm, based on Nicolas Devillard's public domain implementation at http://ndevilla.free.fr/median/median/ Adapted similarly to the above Quicksort algorithm. This version does not produce indices along with values. */ static void THTensor_(quickselectnoidx)(real *arr, long k, long elements, long stride) { long P, L, R, i, j, swap; real rswap, piv; L = 0; R = elements-1; do { if (R <= L) /* One element only */ return; if (R == L+1) { /* Two elements only */ if (ARR(L) > ARR(R)) { ARR_SWAP(L, R); } return; } /* Use median of three for pivot choice */ P=(L+R)>>1; ARR_SWAP(P, L+1); if (ARR(L+1) > ARR(R)) { ARR_SWAP(L+1, R); } if (ARR(L) > ARR(R)) { ARR_SWAP(L, R); } if (ARR(L+1) > ARR(L)) { ARR_SWAP(L+1, L); } i = L+1; j = R; piv = ARR(L); do { do i++; while(ARR(i) < piv); do j--; while(ARR(j) > piv); if (j < i) break; ARR_SWAP(i, j); } while(1); ARR_SWAP(L, j); /* Re-set active partition */ if (j <= k) L=i; if (j >= k) R=j-1; } while(1); } /* Implementation of the Quickselect algorithm, based on Nicolas Devillard's public domain implementation at http://ndevilla.free.fr/median/median/ Adapted similarly to the above Quicksort algorithm. */ static void THTensor_(quickselect)(real *arr, long *idx, long k, long elements, long stride) { long P, L, R, i, j, swap, pid; real rswap, piv; L = 0; R = elements-1; do { if (R <= L) /* One element only */ return; if (R == L+1) { /* Two elements only */ if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); } return; } /* Use median of three for pivot choice */ P=(L+R)>>1; BOTH_SWAP(P, L+1); if (ARR(L+1) > ARR(R)) { BOTH_SWAP(L+1, R); } if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); } if (ARR(L+1) > ARR(L)) { BOTH_SWAP(L+1, L); } i = L+1; j = R; piv = ARR(L); pid = IDX(L); do { do i++; while(ARR(i) < piv); do j--; while(ARR(j) > piv); if (j < i) break; BOTH_SWAP(i, j); } while(1); BOTH_SWAP(L, j); /* Re-set active partition */ if (j <= k) L=i; if (j >= k) R=j-1; } while(1); } #undef ARR #undef IDX #undef LONG_SWAP #undef REAL_SWAP #undef BOTH_SWAP void THTensor_(mode)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim) { THLongStorage *dim; THTensor *temp_; THLongTensor *tempi_; real *temp__data; long *tempi__data; long t_size_dim; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "dimension out of range"); dim = THTensor_(newSizeOf)(t); THLongStorage_set(dim, dimension, 1); THTensor_(resize)(values_, dim, NULL); THLongTensor_resize(indices_, dim, NULL); THLongStorage_free(dim); t_size_dim = THTensor_(size)(t, dimension); temp_ = THTensor_(new)(); THTensor_(resize1d)(temp_, t_size_dim); temp__data = THTensor_(data)(temp_); tempi_ = THLongTensor_new(); THLongTensor_resize1d(tempi_, t_size_dim); tempi__data = THLongTensor_data(tempi_); TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension, long i; real mode = 0; long modei = 0; long temp_freq = 0; long max_freq = 0; for(i = 0; i < t_size_dim; i++) temp__data[i] = t_data[i*t_stride]; for(i = 0; i < t_size_dim; i++) tempi__data[i] = i; THTensor_(quicksortascend)(temp__data, tempi__data, t_size_dim, 1); for(i = 0; i < t_size_dim; i++) { temp_freq++; if ((i == t_size_dim - 1) || (temp__data[i] != temp__data[i+1])) { if (temp_freq > max_freq) { mode = temp__data[i]; modei = tempi__data[i]; max_freq = temp_freq; } temp_freq = 0; } } *values__data = mode; *indices__data = modei;); THTensor_(free)(temp_); THLongTensor_free(tempi_); if (!keepdim) { THTensor_(squeeze1d)(values_, values_, dimension); THLongTensor_squeeze1d(indices_, indices_, dimension); } } void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, long k, int dimension, int keepdim) { THLongStorage *dim; THTensor *temp_; THLongTensor *tempi_; real *temp__data; long *tempi__data; long t_size_dim; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "dimension out of range"); THArgCheck(k > 0 && k <= t->size[dimension], 2, "selected index out of range"); dim = THTensor_(newSizeOf)(t); THLongStorage_set(dim, dimension, 1); THTensor_(resize)(values_, dim, NULL); THLongTensor_resize(indices_, dim, NULL); THLongStorage_free(dim); t_size_dim = THTensor_(size)(t, dimension); temp_ = THTensor_(new)(); THTensor_(resize1d)(temp_, t_size_dim); temp__data = THTensor_(data)(temp_); tempi_ = THLongTensor_new(); THLongTensor_resize1d(tempi_, t_size_dim); tempi__data = THLongTensor_data(tempi_); TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension, long i; for(i = 0; i < t_size_dim; i++) temp__data[i] = t_data[i*t_stride]; for(i = 0; i < t_size_dim; i++) tempi__data[i] = i; THTensor_(quickselect)(temp__data, tempi__data, k - 1, t_size_dim, 1); *values__data = temp__data[k-1]; *indices__data = tempi__data[k-1];); THTensor_(free)(temp_); THLongTensor_free(tempi_); if (!keepdim) { THTensor_(squeeze1d)(values_, values_, dimension); THLongTensor_squeeze1d(indices_, indices_, dimension); } } void THTensor_(median)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim) { long t_size_dim, k; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "dimension out of range"); t_size_dim = THTensor_(size)(t, dimension); k = (t_size_dim-1) >> 1; /* take middle or one-before-middle element */ THTensor_(kthvalue)(values_, indices_, t, k+1, dimension, keepdim); } void THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, long k, int dim, int dir, int sorted) { int numDims = THTensor_(nDimension)(t); THArgCheck(dim >= 0 && dim < numDims, 3, "dim not in range"); long sliceSize = THTensor_(size)(t, dim); THArgCheck(k > 0 && k <= sliceSize, 2, "k not in range for dimension"); THTensor *tmpResults = THTensor_(new)(); THTensor_(resize1d)(tmpResults, sliceSize); real *tmp__data = THTensor_(data)(tmpResults); THLongTensor *tmpIndices = THLongTensor_new(); THLongTensor_resize1d(tmpIndices, sliceSize); long *tmpi__data = THLongTensor_data(tmpIndices); THLongStorage *topKSize = THTensor_(newSizeOf)(t); THLongStorage_set(topKSize, dim, k); THTensor_(resize)(rt_, topKSize, NULL); THLongTensor_resize(ri_, topKSize, NULL); THLongStorage_free(topKSize); if (dir) { /* k largest elements, descending order (optional: see sorted) */ long K = sliceSize - k; TH_TENSOR_DIM_APPLY3(real, t, real, rt_, long, ri_, dim, long i; for(i = 0; i < sliceSize; i++) { tmp__data[i] = t_data[i*t_stride]; tmpi__data[i] = i; } if (K > 0) THTensor_(quickselect)(tmp__data, tmpi__data, K - 1, sliceSize, 1); if (sorted) THTensor_(quicksortdescend)(tmp__data + K, tmpi__data + K, k, 1); for(i = 0; i < k; i++) { rt__data[i*rt__stride] = tmp__data[i + K]; ri__data[i*ri__stride] = tmpi__data[i + K]; }) } else { /* k smallest elements, ascending order (optional: see sorted) */ TH_TENSOR_DIM_APPLY3(real, t, real, rt_, long, ri_, dim, long i; for(i = 0; i < sliceSize; i++) { tmp__data[i] = t_data[i*t_stride]; tmpi__data[i] = i; } THTensor_(quickselect)(tmp__data, tmpi__data, k - 1, sliceSize, 1); if (sorted) THTensor_(quicksortascend)(tmp__data, tmpi__data, k - 1, 1); for(i = 0; i < k; i++) { rt__data[i*rt__stride] = tmp__data[i]; ri__data[i*ri__stride] = tmpi__data[i]; }) } THTensor_(free)(tmpResults); THLongTensor_free(tmpIndices); } void THTensor_(tril)(THTensor *r_, THTensor *t, long k) { long t_size_0, t_size_1; long t_stride_0, t_stride_1; long r__stride_0, r__stride_1; real *t_data, *r__data; long r, c; THArgCheck(THTensor_(nDimension)(t) == 2, 1, "expected a matrix"); THTensor_(resizeAs)(r_, t); t_size_0 = THTensor_(size)(t, 0); t_size_1 = THTensor_(size)(t, 1); t_stride_0 = THTensor_(stride)(t, 0); t_stride_1 = THTensor_(stride)(t, 1); r__stride_0 = THTensor_(stride)(r_, 0); r__stride_1 = THTensor_(stride)(r_, 1); r__data = THTensor_(data)(r_); t_data = THTensor_(data)(t); for(r = 0; r < t_size_0; r++) { long sz = THMin(r+k+1, t_size_1); for(c = THMax(0, r+k+1); c < t_size_1; c++) r__data[r*r__stride_0+c*r__stride_1] = 0; for(c = 0; c < sz; c++) r__data[r*r__stride_0+c*r__stride_1] = t_data[r*t_stride_0+c*t_stride_1]; } } void THTensor_(triu)(THTensor *r_, THTensor *t, long k) { long t_size_0, t_size_1; long t_stride_0, t_stride_1; long r__stride_0, r__stride_1; real *t_data, *r__data; long r, c; THArgCheck(THTensor_(nDimension)(t) == 2, 1, "expected a matrix"); THTensor_(resizeAs)(r_, t); t_size_0 = THTensor_(size)(t, 0); t_size_1 = THTensor_(size)(t, 1); t_stride_0 = THTensor_(stride)(t, 0); t_stride_1 = THTensor_(stride)(t, 1); r__stride_0 = THTensor_(stride)(r_, 0); r__stride_1 = THTensor_(stride)(r_, 1); r__data = THTensor_(data)(r_); t_data = THTensor_(data)(t); for(r = 0; r < t_size_0; r++) { long sz = THMin(r+k, t_size_1); for(c = THMax(0, r+k); c < t_size_1; c++) r__data[r*r__stride_0+c*r__stride_1] = t_data[r*t_stride_0+c*t_stride_1]; for(c = 0; c < sz; c++) r__data[r*r__stride_0+c*r__stride_1] = 0; } } void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension) { THTensor* inputs[2]; inputs[0] = ta; inputs[1] = tb; THTensor_(catArray)(r_, inputs, 2, dimension); } void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension) { THLongStorage *size; int i, j; long offset; int maxDim = dimension + 1; int allEmpty = 1; int allContiguous = 1; // cat_dimension is the actual dimension we cat along int cat_dimension = dimension; for (i = 0; i < numInputs; i++) { maxDim = THMax(maxDim, inputs[i]->nDimension); } // When the user input dimension is -1 (i.e. -2 in C) // Then we pick the maximum last dimension across all tensors. if ( dimension + TH_INDEX_BASE == -1 ) { cat_dimension = maxDim?(maxDim-1):0; } THArgCheck(numInputs > 0, 3, "invalid number of inputs %d", numInputs); THArgCheck(cat_dimension >= 0, 4, "invalid dimension %d", dimension + TH_INDEX_BASE); size = THLongStorage_newWithSize(maxDim); for(i = 0; i < maxDim; i++) { // dimSize is either the size of the dim if it exists, either 1 if #dim > 0, otherwise 0 long dimSize = i < inputs[0]->nDimension ? inputs[0]->size[i] : THMin(inputs[0]->nDimension, 1); if (i == cat_dimension) { for (j = 1; j < numInputs; j++) { // accumulate the size over the dimension we want to cat on. // Empty tensors are allowed dimSize += i < inputs[j]->nDimension ? inputs[j]->size[i] : THMin(inputs[j]->nDimension, 1); } } else { for (j = 1; j < numInputs; j++) { long sz = (i < inputs[j]->nDimension ? inputs[j]->size[i] : THMin(inputs[j]->nDimension, 1)); // If it's a dimension we're not catting on // Then fail if sizes are different AND > 0 if (dimSize != sz && dimSize && sz) { THLongStorage_free(size); THError("inconsistent tensor sizes"); } else if(!dimSize) { dimSize = sz; } } } allEmpty = allEmpty && !dimSize; size->data[i] = dimSize; } // Initiate catting and resizing // If at least one of the input is not empty if (!allEmpty) { THTensor_(resize)(result, size, NULL); // Check contiguity of all inputs and result for (i = 0; i < numInputs; i++) { if(inputs[i]->nDimension) { allContiguous = allContiguous && THTensor_(isContiguous)(inputs[i]); } } allContiguous = allContiguous && THTensor_(isContiguous)(result); // First path is for contiguous inputs along dim 1 // Second path for non-contiguous if (cat_dimension == 0 && allContiguous) { real* result_data = result->storage->data + result->storageOffset; offset = 0; for (j = 0; j < numInputs; j++) { if (inputs[j]->nDimension) { THTensor* input0 = inputs[j]; real* input0_data = input0->storage->data + input0->storageOffset; long input0_size = THTensor_(nElement)(input0); memcpy(result_data + offset, input0_data, input0_size*sizeof(real)); offset += input0_size; } } } else { offset = 0; for (j = 0; j < numInputs; j++) { if (inputs[j]->nDimension) { long dimSize = cat_dimension < inputs[j]->nDimension ? inputs[j]->size[cat_dimension] : 1; THTensor *nt = THTensor_(newWithTensor)(result); THTensor_(narrow)(nt, NULL, cat_dimension, offset, dimSize); THTensor_(copy)(nt, inputs[j]); THTensor_(free)(nt); offset += dimSize; } } } } THLongStorage_free(size); } int THTensor_(equal)(THTensor *ta, THTensor* tb) { int equal = 1; if(!THTensor_(isSameSizeAs)(ta, tb)) return 0; if (THTensor_(isContiguous)(ta) && THTensor_(isContiguous)(tb)) { real *tap = THTensor_(data)(ta); real *tbp = THTensor_(data)(tb); ptrdiff_t sz = THTensor_(nElement)(ta); ptrdiff_t i; for (i=0; inDimension, t->size, NULL); \ TH_TENSOR_APPLY2(unsigned char, r_, real, t, \ *r__data = (*t_data OP value) ? 1 : 0;); \ } \ void THTensor_(NAME##ValueT)(THTensor* r_, THTensor* t, real value) \ { \ THTensor_(resizeNd)(r_, t->nDimension, t->size, NULL); \ TH_TENSOR_APPLY2(real, r_, real, t, \ *r__data = (*t_data OP value) ? 1 : 0;); \ } \ void THTensor_(NAME##Tensor)(THByteTensor *r_, THTensor *ta, THTensor *tb) \ { \ THByteTensor_resizeNd(r_, ta->nDimension, ta->size, NULL); \ TH_TENSOR_APPLY3(unsigned char, r_, real, ta, real, tb, \ *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \ } \ void THTensor_(NAME##TensorT)(THTensor *r_, THTensor *ta, THTensor *tb) \ { \ THTensor_(resizeNd)(r_, ta->nDimension, ta->size, NULL); \ TH_TENSOR_APPLY3(real, r_, real, ta, real, tb, \ *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \ } \ TENSOR_IMPLEMENT_LOGICAL(lt,<) TENSOR_IMPLEMENT_LOGICAL(gt,>) TENSOR_IMPLEMENT_LOGICAL(le,<=) TENSOR_IMPLEMENT_LOGICAL(ge,>=) TENSOR_IMPLEMENT_LOGICAL(eq,==) TENSOR_IMPLEMENT_LOGICAL(ne,!=) #define LAB_IMPLEMENT_BASIC_FUNCTION(NAME, CFUNC) \ void THTensor_(NAME)(THTensor *r_, THTensor *t) \ { \ THTensor_(resizeAs)(r_, t); \ TH_TENSOR_APPLY2(real, t, real, r_, *r__data = CFUNC(*t_data);); \ } \ #if defined(TH_REAL_IS_LONG) LAB_IMPLEMENT_BASIC_FUNCTION(abs,labs) LAB_IMPLEMENT_BASIC_FUNCTION(neg,-) #endif /* long only part */ #if defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) LAB_IMPLEMENT_BASIC_FUNCTION(abs,abs) LAB_IMPLEMENT_BASIC_FUNCTION(neg,-) #endif /* int only part */ #if defined(TH_REAL_IS_BYTE) #define TENSOR_IMPLEMENT_LOGICAL_SUM(NAME, OP, INIT_VALUE) \ int THTensor_(NAME)(THTensor *tensor) \ { \ THArgCheck(tensor->nDimension > 0, 1, "empty Tensor"); \ int sum = INIT_VALUE; \ TH_TENSOR_APPLY(real, tensor, sum = sum OP *tensor_data;); \ return sum; \ } TENSOR_IMPLEMENT_LOGICAL_SUM(logicalall, &&, 1) TENSOR_IMPLEMENT_LOGICAL_SUM(logicalany, ||, 0) #endif /* Byte only part */ /* floating point only now */ #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) #if defined (TH_REAL_IS_FLOAT) #define TH_MATH_NAME(fn) fn##f #else #define TH_MATH_NAME(fn) fn #endif LAB_IMPLEMENT_BASIC_FUNCTION(log,TH_MATH_NAME(log)) LAB_IMPLEMENT_BASIC_FUNCTION(lgamma,TH_MATH_NAME(lgamma)) LAB_IMPLEMENT_BASIC_FUNCTION(log1p,TH_MATH_NAME(log1p)) LAB_IMPLEMENT_BASIC_FUNCTION(sigmoid,TH_MATH_NAME(TH_sigmoid)) LAB_IMPLEMENT_BASIC_FUNCTION(exp,TH_MATH_NAME(exp)) LAB_IMPLEMENT_BASIC_FUNCTION(cos,TH_MATH_NAME(cos)) LAB_IMPLEMENT_BASIC_FUNCTION(acos,TH_MATH_NAME(acos)) LAB_IMPLEMENT_BASIC_FUNCTION(cosh,TH_MATH_NAME(cosh)) LAB_IMPLEMENT_BASIC_FUNCTION(sin,TH_MATH_NAME(sin)) LAB_IMPLEMENT_BASIC_FUNCTION(asin,TH_MATH_NAME(asin)) LAB_IMPLEMENT_BASIC_FUNCTION(sinh,TH_MATH_NAME(sinh)) LAB_IMPLEMENT_BASIC_FUNCTION(tan,TH_MATH_NAME(tan)) LAB_IMPLEMENT_BASIC_FUNCTION(atan,TH_MATH_NAME(atan)) LAB_IMPLEMENT_BASIC_FUNCTION(tanh,TH_MATH_NAME(tanh)) LAB_IMPLEMENT_BASIC_FUNCTION(sqrt,TH_MATH_NAME(sqrt)) LAB_IMPLEMENT_BASIC_FUNCTION(rsqrt,TH_MATH_NAME(TH_rsqrt)) LAB_IMPLEMENT_BASIC_FUNCTION(ceil,TH_MATH_NAME(ceil)) LAB_IMPLEMENT_BASIC_FUNCTION(floor,TH_MATH_NAME(floor)) LAB_IMPLEMENT_BASIC_FUNCTION(round,TH_MATH_NAME(round)) LAB_IMPLEMENT_BASIC_FUNCTION(abs,TH_MATH_NAME(fabs)) LAB_IMPLEMENT_BASIC_FUNCTION(trunc,TH_MATH_NAME(trunc)) LAB_IMPLEMENT_BASIC_FUNCTION(frac,TH_MATH_NAME(TH_frac)) LAB_IMPLEMENT_BASIC_FUNCTION(neg,-) LAB_IMPLEMENT_BASIC_FUNCTION(cinv, TH_MATH_NAME(1.0) / ) void THTensor_(pow)(THTensor *r_, THTensor *t, real value) { THTensor_(resizeAs)(r_, t); if(value == 1){ THTensor_(copy)(r_, t); } else if(value == 2){ THTensor_(cmul)(r_, t, t); } else if(value == 3){ TH_TENSOR_APPLY2(real, t, real, r_, *r__data = *t_data * *t_data * *t_data;); } else if(value == 0.5){ THTensor_(sqrt)(r_, t); } else if(value == -0.5){ THTensor_(rsqrt)(r_, t); } else if(value == -1){ THTensor_(cinv)(r_, t); } else if(value == -2){ TH_TENSOR_APPLY2(real, t, real, r_, *r__data = TH_MATH_NAME(1.0) / (*t_data * *t_data);); } else{ TH_TENSOR_APPLY2(real, t, real, r_, *r__data = TH_MATH_NAME(pow)(*t_data, value);); } } void THTensor_(atan2)(THTensor *r_, THTensor *tx, THTensor *ty) { THTensor_(resizeAs)(r_, tx); TH_TENSOR_APPLY3(real, r_, real, tx, real, ty, *r__data = TH_MATH_NAME(atan2)(*tx_data,*ty_data);); } void THTensor_(lerp)(THTensor *r_, THTensor *a, THTensor *b, real weight) { THArgCheck(THTensor_(nElement)(a) == THTensor_(nElement)(b), 2, "sizes do not match"); THTensor_(resizeAs)(r_, a); TH_TENSOR_APPLY3(real, r_, real, a, real, b, *r__data = TH_MATH_NAME(TH_lerp)(*a_data, *b_data, weight);); } void THTensor_(mean)(THTensor *r_, THTensor *t, int dimension, int keepdim) { THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "invalid dimension %d", dimension + TH_INDEX_BASE); THTensor_(sum)(r_, t, dimension, keepdim); THTensor_(div)(r_, r_, t->size[dimension]); } void THTensor_(std)(THTensor *r_, THTensor *t, int dimension, int biased, int keepdim) { THLongStorage *dim; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "invalid dimension %d", dimension + TH_INDEX_BASE); dim = THTensor_(newSizeOf)(t); THLongStorage_set(dim, dimension, 1); THTensor_(resize)(r_, dim, NULL); THLongStorage_free(dim); TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, accreal sum = 0; accreal sum2 = 0; long i; for(i = 0; i < t_size; i++) { real z = t_data[i*t_stride]; sum += z; sum2 += z*z; } if(biased) { sum /= t_size; sum2 /= t_size; sum2 -= sum*sum; sum2 = (sum2 < 0 ? 0 : sum2); *r__data = (real)TH_MATH_NAME(sqrt)(sum2); } else { sum /= t_size; sum2 /= t_size-1; sum2 -= ((real)t_size)/((real)(t_size-1))*sum*sum; sum2 = (sum2 < 0 ? 0 : sum2); *r__data = (real)TH_MATH_NAME(sqrt)(sum2); }); if (!keepdim) { THTensor_(squeeze1d)(r_, r_, dimension); } } void THTensor_(var)(THTensor *r_, THTensor *t, int dimension, int biased, int keepdim) { THLongStorage *dim; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "invalid dimension %d", dimension + TH_INDEX_BASE); dim = THTensor_(newSizeOf)(t); THLongStorage_set(dim, dimension, 1); THTensor_(resize)(r_, dim, NULL); THLongStorage_free(dim); TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, accreal sum = 0; accreal sum2 = 0; long i; for(i = 0; i < t_size; i++) { real z = t_data[i*t_stride]; sum += z; sum2 += z*z; } if(biased) { sum /= t_size; sum2 /= t_size; sum2 -= sum*sum; sum2 = (sum2 < 0 ? 0 : sum2); *r__data = sum2; } else { sum /= t_size; sum2 /= t_size-1; sum2 -= ((real)t_size)/((real)(t_size-1))*sum*sum; sum2 = (sum2 < 0 ? 0 : sum2); *r__data = (real)sum2; }); if (!keepdim) { THTensor_(squeeze1d)(r_, r_, dimension); } } void THTensor_(norm)(THTensor *r_, THTensor *t, real value, int dimension, int keepdim) { THLongStorage *dim; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "invalid dimension %d", dimension + TH_INDEX_BASE); dim = THTensor_(newSizeOf)(t); THLongStorage_set(dim, dimension, 1); THTensor_(resize)(r_, dim, NULL); THLongStorage_free(dim); if(value == 0) { TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, accreal sum = 0; long i; for(i = 0; i < t_size; i++) sum += t_data[i*t_stride] != 0.0; *r__data = sum;) } else { TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, accreal sum = 0; long i; for(i = 0; i < t_size; i++) { sum += TH_MATH_NAME(pow)( TH_MATH_NAME(fabs)(t_data[i*t_stride]), value); } *r__data = TH_MATH_NAME(pow)(sum, 1.0/value);) } if (!keepdim) { THTensor_(squeeze1d)(r_, r_, dimension); } } accreal THTensor_(normall)(THTensor *tensor, real value) { accreal sum = 0; if(value == 0) { TH_TENSOR_APPLY(real, tensor, sum += *tensor_data != 0.0;); return sum; } else if(value == 1) { TH_TENSOR_APPLY(real, tensor, sum += TH_MATH_NAME(fabs)(*tensor_data);); return sum; } else if(value == 2) { TH_TENSOR_APPLY(real, tensor, accreal z = *tensor_data; sum += z*z;); return sqrt(sum); } else { TH_TENSOR_APPLY(real, tensor, sum += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*tensor_data), value);); return TH_MATH_NAME(pow)(sum, 1.0/value); } } void THTensor_(renorm)(THTensor *res, THTensor *src, real value, int dimension, real maxnorm) { int i; THTensor *rowR, *rowS; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(src), 3, "invalid dimension %d", dimension + TH_INDEX_BASE); THArgCheck(value > 0, 2, "non-positive-norm not supported"); THArgCheck(THTensor_(nDimension)(src) > 1, 1, "need at least 2 dimensions, got %d dimensions", THTensor_(nDimension)(src)); rowR = THTensor_(new)(); rowS = THTensor_(new)(); THTensor_(resizeAs)(res, src); for (i=0; isize[dimension]; i++) { real norm = 0; real new_norm; THTensor_(select)(rowS, src, dimension, i); THTensor_(select)(rowR, res, dimension, i); if (value == 1) { TH_TENSOR_APPLY(real, rowS, norm += fabs(*rowS_data);); } else if (value == 2) { TH_TENSOR_APPLY(real, rowS, accreal z = *rowS_data; norm += z*z;); } else { TH_TENSOR_APPLY(real, rowS, norm += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*rowS_data), value);); } norm = pow(norm, 1/value); if (norm > maxnorm) { new_norm = maxnorm / (norm + 1e-7); TH_TENSOR_APPLY2( real, rowR, real, rowS, *rowR_data = (*rowS_data) * new_norm; ) } else THTensor_(copy)(rowR, rowS); } THTensor_(free)(rowR); THTensor_(free)(rowS); } accreal THTensor_(dist)(THTensor *tensor, THTensor *src, real value) { real sum = 0; TH_TENSOR_APPLY2(real, tensor, real, src, sum += TH_MATH_NAME(pow)( TH_MATH_NAME(fabs)(*tensor_data - *src_data), value);); return TH_MATH_NAME(pow)(sum, 1.0/value); } accreal THTensor_(meanall)(THTensor *tensor) { THArgCheck(tensor->nDimension > 0, 1, "empty Tensor"); return THTensor_(sumall)(tensor)/THTensor_(nElement)(tensor); } accreal THTensor_(varall)(THTensor *tensor, int biased) { accreal mean = THTensor_(meanall)(tensor); accreal sum = 0; TH_TENSOR_APPLY(real, tensor, sum += (*tensor_data - mean)*(*tensor_data - mean);); sum /= THTensor_(nElement)(tensor) - (biased ? 0 : 1); return sum; } accreal THTensor_(stdall)(THTensor *tensor, int biased) { return sqrt(THTensor_(varall)(tensor, biased)); } void THTensor_(linspace)(THTensor *r_, real a, real b, long n) { real i = 0; THArgCheck(n > 1 || (n == 1 && (a == b)), 3, "invalid number of points"); if (THTensor_(nElement)(r_) != n) { THTensor_(resize1d)(r_, n); } if(n == 1) { THTensor_(set1d)(r_, 0, a); } else { TH_TENSOR_APPLY(real, r_, *r__data = a + i*(b-a)/((real)(n-1)); i++; ); } } void THTensor_(logspace)(THTensor *r_, real a, real b, long n) { real i = 0; THArgCheck(n > 1 || (n == 1 && (a == b)), 3, "invalid number of points"); if (THTensor_(nElement)(r_) != n) { THTensor_(resize1d)(r_, n); } if(n == 1) { THTensor_(set1d)(r_, 0, TH_MATH_NAME(pow)(10.0, a)); } else { TH_TENSOR_APPLY(real, r_, *r__data = TH_MATH_NAME(pow)(10.0, a + i*(b-a)/((real)(n-1))); i++; ); } } void THTensor_(rand)(THTensor *r_, THGenerator *_generator, THLongStorage *size) { THTensor_(resize)(r_, size, NULL); THTensor_(uniform)(r_, _generator, 0, 1); } void THTensor_(randn)(THTensor *r_, THGenerator *_generator, THLongStorage *size) { THTensor_(resize)(r_, size, NULL); THTensor_(normal)(r_, _generator, 0, 1); } void THTensor_(histc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue) { real minval; real maxval; real *h_data; THTensor_(resize1d)(hist, nbins); THTensor_(zero)(hist); minval = minvalue; maxval = maxvalue; if (minval == maxval) { minval = THTensor_(minall)(tensor); maxval = THTensor_(maxall)(tensor); } if (minval == maxval) { minval = minval - 1; maxval = maxval + 1; } h_data = THTensor_(data)(hist); TH_TENSOR_APPLY(real, tensor, if (*tensor_data >= minval && *tensor_data <= maxval) { const int bin = (int)((*tensor_data-minval) / (maxval-minval) * nbins); h_data[THMin(bin, nbins-1)] += 1; } ); } void THTensor_(bhistc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue) { THArgCheck(THTensor_(nDimension)(tensor) < 3, 2, "invalid dimension %d, the input must be a 2d tensor", THTensor_(nDimension)(tensor)); int dimension = 1; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(tensor), 2, "invalid dimension %d", dimension + TH_INDEX_BASE); real minval; real maxval; real *h_data; THTensor_(resize2d)(hist, tensor->size[0], nbins); THTensor_(zero)(hist); minval = minvalue; maxval = maxvalue; if (minval == maxval) { minval = THTensor_(minall)(tensor); maxval = THTensor_(maxall)(tensor); } if (minval == maxval) { minval = minval - 1; maxval = maxval + 1; } TH_TENSOR_DIM_APPLY2(real, tensor, real, hist, dimension, long i; for(i = 0; i < tensor_size; i++) { if(tensor_data[i*tensor_stride] >= minval && tensor_data[i*tensor_stride] <= maxval) { const int bin = (int)((tensor_data[i*tensor_stride]-minval) / (maxval-minval) * nbins); hist_data[THMin(bin, nbins-1)] += 1; } } ); } #undef TH_MATH_NAME #endif /* floating point only part */ #undef IS_NONZERO #endif lib/TH/generic/THTensorMath.h000066400000000000000000000274541316246254300162660ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensorMath.h" #else TH_API void THTensor_(fill)(THTensor *r_, real value); TH_API void THTensor_(zero)(THTensor *r_); TH_API void THTensor_(maskedFill)(THTensor *tensor, THByteTensor *mask, real value); TH_API void THTensor_(maskedCopy)(THTensor *tensor, THByteTensor *mask, THTensor* src); TH_API void THTensor_(maskedSelect)(THTensor *tensor, THTensor* src, THByteTensor *mask); TH_API void THTensor_(nonzero)(THLongTensor *subscript, THTensor *tensor); TH_API void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index); TH_API void THTensor_(indexCopy)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src); TH_API void THTensor_(indexAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src); TH_API void THTensor_(indexFill)(THTensor *tensor, int dim, THLongTensor *index, real val); TH_API void THTensor_(gather)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index); TH_API void THTensor_(scatter)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src); TH_API void THTensor_(scatterAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src); TH_API void THTensor_(scatterFill)(THTensor *tensor, int dim, THLongTensor *index, real val); TH_API accreal THTensor_(dot)(THTensor *t, THTensor *src); TH_API real THTensor_(minall)(THTensor *t); TH_API real THTensor_(maxall)(THTensor *t); TH_API real THTensor_(medianall)(THTensor *t); TH_API accreal THTensor_(sumall)(THTensor *t); TH_API accreal THTensor_(prodall)(THTensor *t); TH_API void THTensor_(neg)(THTensor *self, THTensor *src); TH_API void THTensor_(cinv)(THTensor *self, THTensor *src); TH_API void THTensor_(add)(THTensor *r_, THTensor *t, real value); TH_API void THTensor_(sub)(THTensor *self, THTensor *src, real value); TH_API void THTensor_(mul)(THTensor *r_, THTensor *t, real value); TH_API void THTensor_(div)(THTensor *r_, THTensor *t, real value); TH_API void THTensor_(lshift)(THTensor *r_, THTensor *t, real value); TH_API void THTensor_(rshift)(THTensor *r_, THTensor *t, real value); TH_API void THTensor_(fmod)(THTensor *r_, THTensor *t, real value); TH_API void THTensor_(remainder)(THTensor *r_, THTensor *t, real value); TH_API void THTensor_(clamp)(THTensor *r_, THTensor *t, real min_value, real max_value); TH_API void THTensor_(bitand)(THTensor *r_, THTensor *t, real value); TH_API void THTensor_(bitor)(THTensor *r_, THTensor *t, real value); TH_API void THTensor_(bitxor)(THTensor *r_, THTensor *t, real value); TH_API void THTensor_(cadd)(THTensor *r_, THTensor *t, real value, THTensor *src); TH_API void THTensor_(csub)(THTensor *self, THTensor *src1, real value, THTensor *src2); TH_API void THTensor_(cmul)(THTensor *r_, THTensor *t, THTensor *src); TH_API void THTensor_(cpow)(THTensor *r_, THTensor *t, THTensor *src); TH_API void THTensor_(cdiv)(THTensor *r_, THTensor *t, THTensor *src); TH_API void THTensor_(clshift)(THTensor *r_, THTensor *t, THTensor *src); TH_API void THTensor_(crshift)(THTensor *r_, THTensor *t, THTensor *src); TH_API void THTensor_(cfmod)(THTensor *r_, THTensor *t, THTensor *src); TH_API void THTensor_(cremainder)(THTensor *r_, THTensor *t, THTensor *src); TH_API void THTensor_(cbitand)(THTensor *r_, THTensor *t, THTensor *src); TH_API void THTensor_(cbitor)(THTensor *r_, THTensor *t, THTensor *src); TH_API void THTensor_(cbitxor)(THTensor *r_, THTensor *t, THTensor *src); TH_API void THTensor_(addcmul)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2); TH_API void THTensor_(addcdiv)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2); TH_API void THTensor_(addmv)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *mat, THTensor *vec); TH_API void THTensor_(addmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *mat1, THTensor *mat2); TH_API void THTensor_(addr)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *vec1, THTensor *vec2); TH_API void THTensor_(addbmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2); TH_API void THTensor_(baddbmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2); TH_API void THTensor_(match)(THTensor *r_, THTensor *m1, THTensor *m2, real gain); TH_API ptrdiff_t THTensor_(numel)(THTensor *t); TH_API void THTensor_(max)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim); TH_API void THTensor_(min)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim); TH_API void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, long k, int dimension, int keepdim); TH_API void THTensor_(mode)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim); TH_API void THTensor_(median)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim); TH_API void THTensor_(sum)(THTensor *r_, THTensor *t, int dimension, int keepdim); TH_API void THTensor_(prod)(THTensor *r_, THTensor *t, int dimension, int keepdim); TH_API void THTensor_(cumsum)(THTensor *r_, THTensor *t, int dimension); TH_API void THTensor_(cumprod)(THTensor *r_, THTensor *t, int dimension); TH_API void THTensor_(sign)(THTensor *r_, THTensor *t); TH_API accreal THTensor_(trace)(THTensor *t); TH_API void THTensor_(cross)(THTensor *r_, THTensor *a, THTensor *b, int dimension); TH_API void THTensor_(cmax)(THTensor *r, THTensor *t, THTensor *src); TH_API void THTensor_(cmin)(THTensor *r, THTensor *t, THTensor *src); TH_API void THTensor_(cmaxValue)(THTensor *r, THTensor *t, real value); TH_API void THTensor_(cminValue)(THTensor *r, THTensor *t, real value); TH_API void THTensor_(zeros)(THTensor *r_, THLongStorage *size); TH_API void THTensor_(zerosLike)(THTensor *r_, THTensor *input); TH_API void THTensor_(ones)(THTensor *r_, THLongStorage *size); TH_API void THTensor_(onesLike)(THTensor *r_, THTensor *input); TH_API void THTensor_(diag)(THTensor *r_, THTensor *t, int k); TH_API void THTensor_(eye)(THTensor *r_, long n, long m); TH_API void THTensor_(arange)(THTensor *r_, accreal xmin, accreal xmax, accreal step); TH_API void THTensor_(range)(THTensor *r_, accreal xmin, accreal xmax, accreal step); TH_API void THTensor_(randperm)(THTensor *r_, THGenerator *_generator, long n); TH_API void THTensor_(reshape)(THTensor *r_, THTensor *t, THLongStorage *size); TH_API void THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int dimension, int descendingOrder); TH_API void THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, long k, int dim, int dir, int sorted); TH_API void THTensor_(tril)(THTensor *r_, THTensor *t, long k); TH_API void THTensor_(triu)(THTensor *r_, THTensor *t, long k); TH_API void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension); TH_API void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension); TH_API int THTensor_(equal)(THTensor *ta, THTensor *tb); TH_API void THTensor_(ltValue)(THByteTensor *r_, THTensor* t, real value); TH_API void THTensor_(leValue)(THByteTensor *r_, THTensor* t, real value); TH_API void THTensor_(gtValue)(THByteTensor *r_, THTensor* t, real value); TH_API void THTensor_(geValue)(THByteTensor *r_, THTensor* t, real value); TH_API void THTensor_(neValue)(THByteTensor *r_, THTensor* t, real value); TH_API void THTensor_(eqValue)(THByteTensor *r_, THTensor* t, real value); TH_API void THTensor_(ltValueT)(THTensor *r_, THTensor* t, real value); TH_API void THTensor_(leValueT)(THTensor *r_, THTensor* t, real value); TH_API void THTensor_(gtValueT)(THTensor *r_, THTensor* t, real value); TH_API void THTensor_(geValueT)(THTensor *r_, THTensor* t, real value); TH_API void THTensor_(neValueT)(THTensor *r_, THTensor* t, real value); TH_API void THTensor_(eqValueT)(THTensor *r_, THTensor* t, real value); TH_API void THTensor_(ltTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); TH_API void THTensor_(leTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); TH_API void THTensor_(gtTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); TH_API void THTensor_(geTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); TH_API void THTensor_(neTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); TH_API void THTensor_(eqTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); TH_API void THTensor_(ltTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); TH_API void THTensor_(leTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); TH_API void THTensor_(gtTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); TH_API void THTensor_(geTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); TH_API void THTensor_(neTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); TH_API void THTensor_(eqTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); #if defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_LONG) TH_API void THTensor_(abs)(THTensor *r_, THTensor *t); #endif #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) TH_API void THTensor_(sigmoid)(THTensor *r_, THTensor *t); TH_API void THTensor_(log)(THTensor *r_, THTensor *t); TH_API void THTensor_(lgamma)(THTensor *r_, THTensor *t); TH_API void THTensor_(log1p)(THTensor *r_, THTensor *t); TH_API void THTensor_(exp)(THTensor *r_, THTensor *t); TH_API void THTensor_(cos)(THTensor *r_, THTensor *t); TH_API void THTensor_(acos)(THTensor *r_, THTensor *t); TH_API void THTensor_(cosh)(THTensor *r_, THTensor *t); TH_API void THTensor_(sin)(THTensor *r_, THTensor *t); TH_API void THTensor_(asin)(THTensor *r_, THTensor *t); TH_API void THTensor_(sinh)(THTensor *r_, THTensor *t); TH_API void THTensor_(tan)(THTensor *r_, THTensor *t); TH_API void THTensor_(atan)(THTensor *r_, THTensor *t); TH_API void THTensor_(atan2)(THTensor *r_, THTensor *tx, THTensor *ty); TH_API void THTensor_(tanh)(THTensor *r_, THTensor *t); TH_API void THTensor_(pow)(THTensor *r_, THTensor *t, real value); TH_API void THTensor_(tpow)(THTensor *r_, real value, THTensor *t); TH_API void THTensor_(sqrt)(THTensor *r_, THTensor *t); TH_API void THTensor_(rsqrt)(THTensor *r_, THTensor *t); TH_API void THTensor_(ceil)(THTensor *r_, THTensor *t); TH_API void THTensor_(floor)(THTensor *r_, THTensor *t); TH_API void THTensor_(round)(THTensor *r_, THTensor *t); TH_API void THTensor_(abs)(THTensor *r_, THTensor *t); TH_API void THTensor_(trunc)(THTensor *r_, THTensor *t); TH_API void THTensor_(frac)(THTensor *r_, THTensor *t); TH_API void THTensor_(lerp)(THTensor *r_, THTensor *a, THTensor *b, real weight); TH_API void THTensor_(mean)(THTensor *r_, THTensor *t, int dimension, int keepdim); TH_API void THTensor_(std)(THTensor *r_, THTensor *t, int dimension, int biased, int keepdim); TH_API void THTensor_(var)(THTensor *r_, THTensor *t, int dimension, int biased, int keepdim); TH_API void THTensor_(norm)(THTensor *r_, THTensor *t, real value, int dimension, int keepdim); TH_API void THTensor_(renorm)(THTensor *r_, THTensor *t, real value, int dimension, real maxnorm); TH_API accreal THTensor_(dist)(THTensor *a, THTensor *b, real value); TH_API void THTensor_(histc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue); TH_API void THTensor_(bhistc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue); TH_API accreal THTensor_(meanall)(THTensor *self); TH_API accreal THTensor_(varall)(THTensor *self, int biased); TH_API accreal THTensor_(stdall)(THTensor *self, int biased); TH_API accreal THTensor_(normall)(THTensor *t, real value); TH_API void THTensor_(linspace)(THTensor *r_, real a, real b, long n); TH_API void THTensor_(logspace)(THTensor *r_, real a, real b, long n); TH_API void THTensor_(rand)(THTensor *r_, THGenerator *_generator, THLongStorage *size); TH_API void THTensor_(randn)(THTensor *r_, THGenerator *_generator, THLongStorage *size); #endif #if defined(TH_REAL_IS_BYTE) TH_API int THTensor_(logicalall)(THTensor *self); TH_API int THTensor_(logicalany)(THTensor *self); #endif /* TH_REAL_IS_BYTE */ #endif lib/TH/generic/THTensorRandom.c000066400000000000000000000313371316246254300166030ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensorRandom.c" #else void THTensor_(random)(THTensor *self, THGenerator *_generator) { #if defined(TH_REAL_IS_BYTE) TH_TENSOR_APPLY(real, self, *self_data = (unsigned char)(THRandom_random(_generator) % (UCHAR_MAX+1));); #elif defined(TH_REAL_IS_CHAR) TH_TENSOR_APPLY(real, self, *self_data = (char)(THRandom_random(_generator) % (CHAR_MAX+1));); #elif defined(TH_REAL_IS_SHORT) TH_TENSOR_APPLY(real, self, *self_data = (short)(THRandom_random(_generator) % (SHRT_MAX+1));); #elif defined(TH_REAL_IS_INT) TH_TENSOR_APPLY(real, self, *self_data = (int)(THRandom_random(_generator) % (INT_MAX+1UL));); #elif defined(TH_REAL_IS_LONG) TH_TENSOR_APPLY(real, self, *self_data = (long)(THRandom_random(_generator) % (LONG_MAX+1UL));); #elif defined(TH_REAL_IS_FLOAT) TH_TENSOR_APPLY(real, self, *self_data = (float)(THRandom_random(_generator) % ((1UL << FLT_MANT_DIG)+1));); #elif defined(TH_REAL_IS_DOUBLE) TH_TENSOR_APPLY(real, self, *self_data = (double)(THRandom_random(_generator) % ((1ULL << DBL_MANT_DIG)+1));); #else #error "Unknown type" #endif } void THTensor_(clampedRandom)(THTensor *self, THGenerator *_generator, long min, long max) { THArgCheck(max > min, 2, "max must be greater than min"); TH_TENSOR_APPLY(real, self, *self_data = (real)((THRandom_random(_generator) % (max - min)) + min);) } void THTensor_(cappedRandom)(THTensor *self, THGenerator *_generator, long max) { THArgCheck(max > 0, 1, "max must be positive"); THTensor_(clampedRandom)(self, _generator, 0, max); } void THTensor_(geometric)(THTensor *self, THGenerator *_generator, double p) { TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_geometric(_generator, p);); } void THTensor_(bernoulli)(THTensor *self, THGenerator *_generator, double p) { TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_bernoulli(_generator, p);); } void THTensor_(bernoulli_FloatTensor)(THTensor *self, THGenerator *_generator, THFloatTensor *p) { TH_TENSOR_APPLY2(real, self, float, p, *self_data = (real)THRandom_bernoulli(_generator, (double)*p_data);); } void THTensor_(bernoulli_DoubleTensor)(THTensor *self, THGenerator *_generator, THDoubleTensor *p) { TH_TENSOR_APPLY2(real, self, double, p, *self_data = (real)THRandom_bernoulli(_generator, (double)*p_data);); } #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) void THTensor_(uniform)(THTensor *self, THGenerator *_generator, double a, double b) { TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_uniform(_generator, a, b);); } void THTensor_(normal)(THTensor *self, THGenerator *_generator, double mean, double stdv) { TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_normal(_generator, mean, stdv);); } void THTensor_(normal_means)(THTensor *self, THGenerator *gen, THTensor *means, double stddev) { THTensor_(resizeAs)(self, means); THTensor_(normal)(self, gen, 0, stddev); THTensor_(cadd)(self, self, 1, means); } void THTensor_(normal_stddevs)(THTensor *self, THGenerator *gen, double mean, THTensor *stddevs) { THTensor_(resizeAs)(self, stddevs); THTensor_(normal)(self, gen, 0, 1); THTensor_(cmul)(self, self, stddevs); THTensor_(add)(self, self, mean); } void THTensor_(normal_means_stddevs)(THTensor *self, THGenerator *gen, THTensor *means, THTensor *stddevs) { THTensor_(resizeAs)(self, means); THTensor_(normal)(self, gen, 0, 1); THTensor_(cmul)(self, self, stddevs); THTensor_(cadd)(self, self, 1, means); } void THTensor_(exponential)(THTensor *self, THGenerator *_generator, double lambda) { TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_exponential(_generator, lambda);); } void THTensor_(cauchy)(THTensor *self, THGenerator *_generator, double median, double sigma) { TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_cauchy(_generator, median, sigma);); } void THTensor_(logNormal)(THTensor *self, THGenerator *_generator, double mean, double stdv) { TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_logNormal(_generator, mean, stdv);); } void THTensor_(multinomialAliasSetup)(THTensor *probs, THLongTensor *J, THTensor *q) { long inputsize = THTensor_(nElement)(probs); long i = 0; THLongTensor *smaller = THLongTensor_newWithSize1d(inputsize); THLongTensor *larger = THLongTensor_newWithSize1d(inputsize); long small_c = 0; long large_c = 0; THLongTensor_resize1d(J, inputsize); THTensor_(resize1d)(q, inputsize); real *q_data = THTensor_(data)(q); long *J_data = THLongTensor_data(J); for(i = 0; i < inputsize; i++) { THTensor_fastSet1d(J, i, 0L); real val = THTensor_fastGet1d(probs, i); THTensor_fastSet1d(q, i, inputsize*val); if (inputsize * val < 1.0) { THTensor_fastSet1d(smaller, small_c, i); small_c += 1; } else { THTensor_fastSet1d(larger, large_c, i); large_c += 1; } } // Loop through and create little binary mixtures that // appropriately allocate the larger outcomes over the // overall uniform mixture. long large, small; while(small_c > 0 && large_c > 0) { large = THTensor_fastGet1d(larger, large_c-1); small = THTensor_fastGet1d(smaller, small_c-1); THTensor_fastSet1d(J, small, large); q_data[large * q->stride[0]] -= 1.0 - THTensor_fastGet1d(q, small); if(q_data[large * q->stride[0]] < 1.0) { THTensor_fastSet1d(smaller, small_c-1, large); large_c -= 1; } else { THTensor_fastSet1d(larger, large_c-1, large); small_c -= 1; } } real q_min = THTensor_fastGet1d(q, inputsize-1); real q_max = q_min; real q_temp; for(i=0; i < inputsize; i++) { q_temp = THTensor_fastGet1d(q, i); if(q_temp < q_min) q_min = q_temp; else if(q_temp > q_max) q_max = q_temp; } THArgCheckWithCleanup((q_min > 0), THCleanup(THLongTensor_free(smaller); THLongTensor_free(larger);), 2, "q_min is less than 0"); if(q_max > 1) { for(i=0; i < inputsize; i++) { q_data[i*q->stride[0]] /= q_max; } } for(i=0; i 0, 2, "cannot sample n_sample < 0 samples"); if (!with_replacement) { THArgCheck((!with_replacement) && (n_sample <= n_categories), 2, \ "cannot sample n_sample > prob_dist:size(1) samples without replacement"); } /* cumulative probability distribution vector */ cum_dist = THDoubleTensor_newWithSize1d(n_categories); /* will contain multinomial samples (category indices to be returned) */ THLongTensor_resize2d(self, n_dist , n_sample); for (i=0; istorage, \ prob_dist->storageOffset+i*prob_dist->stride[0]+j*prob_dist->stride[1] \ ); THDoubleStorage_set( cum_dist->storage, \ cum_dist->storageOffset+j*cum_dist->stride[0], \ sum \ ); } THArgCheckWithCleanup((sum > 0), THCleanup(THDoubleTensor_free(cum_dist);), 2, "invalid multinomial distribution (sum of probabilities <= 0)"); /* normalize cumulative probability distribution so that last val is 1 i.e. doesn't assume original prob_dist row sums to one */ if ( (sum > 0) || ( ( sum < 1.00001) && (sum > 0.99999) ) ) { for (j=0; jstride[0]] /= sum; } } for (j=0; jstride[0]] = 1; while(right_pointer - left_pointer > 0) { mid_pointer = left_pointer + (right_pointer - left_pointer) / 2; cum_prob = THDoubleStorage_get( \ cum_dist->storage, \ cum_dist->storageOffset+mid_pointer*cum_dist->stride[0] \ ); if (cum_prob < uniform_sample) { left_pointer = mid_pointer + 1; } else { right_pointer = mid_pointer; } } sample_idx = left_pointer; /* store in result tensor (will be incremented for lua compat by wrapper) */ THLongStorage_set( \ self->storage, \ self->storageOffset+i*self->stride[0]+j*self->stride[1], \ sample_idx \ ); /* Once a sample is drawn, it cannot be drawn again. ie sample without replacement */ if (!with_replacement) { /* update cumulative distribution so that sample cannot be drawn again */ double diff; double new_val = 0; double sum; if (sample_idx != 0) { new_val = THDoubleStorage_get( \ cum_dist->storage, \ cum_dist->storageOffset+(sample_idx-1)*cum_dist->stride[0] \ ); } /* marginal cumulative mass (i.e. original probability) of sample */ diff = THDoubleStorage_get( \ cum_dist->storage, \ cum_dist->storageOffset+sample_idx*cum_dist->stride[0] \ ) - new_val; /* new sum of marginals is not one anymore... */ sum = 1.0 - diff; for (k=0; kstorage, \ cum_dist->storageOffset+k*cum_dist->stride[0] \ ); if (k >= sample_idx) { /* remove sampled probability mass from later cumulative probabilities */ new_val -= diff; } /* make total marginals sum to one */ new_val /= sum; THDoubleStorage_set( \ cum_dist->storage, \ cum_dist->storageOffset+k*cum_dist->stride[0], \ new_val \ ); } } } } THDoubleTensor_free(cum_dist); if (start_dim == 1) { THLongTensor_resize1d(self, n_sample); THTensor_(resize1d)(prob_dist, n_categories); } } #endif #if defined(TH_REAL_IS_BYTE) void THTensor_(getRNGState)(THGenerator *_generator, THTensor *self) { static const size_t size = sizeof(THGenerator); THGenerator *rng_state; THTensor_(resize1d)(self, size); THArgCheck(THTensor_(nElement)(self) == size, 1, "RNG state is wrong size"); THArgCheck(THTensor_(isContiguous)(self), 1, "RNG state needs to be contiguous"); rng_state = (THGenerator *)THTensor_(data)(self); THGenerator_copy(rng_state, _generator); } void THTensor_(setRNGState)(THGenerator *_generator, THTensor *self) { static const size_t size = sizeof(THGenerator); THGenerator *rng_state; THArgCheck(THTensor_(nElement)(self) == size, 1, "RNG state is wrong size"); THArgCheck(THTensor_(isContiguous)(self), 1, "RNG state needs to be contiguous"); rng_state = (THGenerator *)THTensor_(data)(self); THArgCheck(THGenerator_isValid(rng_state), 1, "Invalid RNG state"); THGenerator_copy(_generator, rng_state); } #endif #endif lib/TH/generic/THTensorRandom.h000066400000000000000000000041511316246254300166020ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensorRandom.h" #else TH_API void THTensor_(random)(THTensor *self, THGenerator *_generator); TH_API void THTensor_(clampedRandom)(THTensor *self, THGenerator *_generator, long min, long max); TH_API void THTensor_(cappedRandom)(THTensor *self, THGenerator *_generator, long max); TH_API void THTensor_(geometric)(THTensor *self, THGenerator *_generator, double p); TH_API void THTensor_(bernoulli)(THTensor *self, THGenerator *_generator, double p); TH_API void THTensor_(bernoulli_FloatTensor)(THTensor *self, THGenerator *_generator, THFloatTensor *p); TH_API void THTensor_(bernoulli_DoubleTensor)(THTensor *self, THGenerator *_generator, THDoubleTensor *p); #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) TH_API void THTensor_(uniform)(THTensor *self, THGenerator *_generator, double a, double b); TH_API void THTensor_(normal)(THTensor *self, THGenerator *_generator, double mean, double stdv); TH_API void THTensor_(normal_means)(THTensor *self, THGenerator *gen, THTensor *means, double stddev); TH_API void THTensor_(normal_stddevs)(THTensor *self, THGenerator *gen, double mean, THTensor *stddevs); TH_API void THTensor_(normal_means_stddevs)(THTensor *self, THGenerator *gen, THTensor *means, THTensor *stddevs); TH_API void THTensor_(exponential)(THTensor *self, THGenerator *_generator, double lambda); TH_API void THTensor_(cauchy)(THTensor *self, THGenerator *_generator, double median, double sigma); TH_API void THTensor_(logNormal)(THTensor *self, THGenerator *_generator, double mean, double stdv); TH_API void THTensor_(multinomial)(THLongTensor *self, THGenerator *_generator, THTensor *prob_dist, int n_sample, int with_replacement); TH_API void THTensor_(multinomialAliasSetup)(THTensor *prob_dist, THLongTensor *J, THTensor *q); TH_API void THTensor_(multinomialAliasDraw)(THLongTensor *self, THGenerator *_generator, THLongTensor *J, THTensor *q); #endif #if defined(TH_REAL_IS_BYTE) TH_API void THTensor_(getRNGState)(THGenerator *_generator, THTensor *self); TH_API void THTensor_(setRNGState)(THGenerator *_generator, THTensor *self); #endif #endif lib/TH/generic/THVector.h000066400000000000000000000015211316246254300154270ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THVector.h" #else TH_API void THVector_(fill)(real *x, const real c, const ptrdiff_t n); TH_API void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n); TH_API void THVector_(adds)(real *y, const real *x, const real c, const ptrdiff_t n); TH_API void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n); TH_API void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n); TH_API void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n); TH_API void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n); TH_API void THVector_(copy)(real *y, const real *x, const ptrdiff_t n); /* Initialize the dispatch pointers */ TH_API void THVector_(vectorDispatchInit)(void); #endif lib/TH/generic/THVectorDefault.c000066400000000000000000000044241316246254300167340ustar00rootroot00000000000000#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THVectorDefault.c" #else void THVector_(copy_DEFAULT)(real *x, const real *y, const ptrdiff_t n) { ptrdiff_t i = 0; for(; i static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax, unsigned int *__ebx, unsigned int *__ecx, unsigned int *__edx) { unsigned int cpui[4]; __cpuid(cpui, __level); *__eax = cpui[0]; *__ebx = cpui[1]; *__ecx = cpui[2]; *__edx = cpui[3]; return 1; } static void xgetbv(unsigned int op, unsigned int* eax, unsigned int* edx) { *eax = 0; *edx = 0; if (op == 0) *eax = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); } #else #if __i386__ #define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ __asm(" pushl %%ebx\n" \ " cpuid\n" \ " mov %%ebx,%1\n" \ " popl %%ebx" \ : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__level)) #else #define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__level)) #endif static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax, unsigned int *__ebx, unsigned int *__ecx, unsigned int *__edx) { __cpuid(__level, *__eax, *__ebx, *__ecx, *__edx); return 1; } static void xgetbv(unsigned int op, unsigned int* eax, unsigned int* edx) { __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); } #endif enum ECPUFeature { kCPUFeature_SSE = 0x01, kCPUFeature_SSE2 = 0x02, kCPUFeature_SSE3 = 0x04, kCPUFeature_SSE3_S = 0x08, kCPUFeature_SSE4_1 = 0x10, kCPUFeature_SSE4_2 = 0x20, kCPUFeature_AVX = 0x40 }; static unsigned int checkCPUFeatures() { unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; unsigned int features = 0; __get_cpuid(1, &eax, &ebx, &ecx, &edx); if( (edx & (1 << 25)) != 0 ) { features |= kCPUFeature_SSE; } if( (edx & (1 << 26)) != 0 ) { features |= kCPUFeature_SSE2; } if( (ecx & (1 << 0)) != 0 ) { features |= kCPUFeature_SSE3; } if( (ecx & (1 << 9)) != 0 ) { features |= kCPUFeature_SSE3_S; } if( (ecx & (1 << 19)) != 0 ) { features |= kCPUFeature_SSE4_1; } if( (ecx & (1 << 20)) != 0 ) { features |= kCPUFeature_SSE4_2; } if( (ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0 ) { xgetbv(0, &eax, &edx); if( (eax & 6) == 6 ) { features |= kCPUFeature_AVX; } } return features; } #include static int haveCPUFeature(unsigned int feature) { static unsigned int sCPUFeatures = 0; static int sDetectedCPUFeatures = 0; if (!sDetectedCPUFeatures) { sDetectedCPUFeatures = 1; sCPUFeatures = checkCPUFeatures(); if ((sCPUFeatures & kCPUFeature_AVX) != 0) { printf("torch running avx\n"); } else { printf("torch running sse \n"); } } return (sCPUFeatures & feature) != 0; } #endif void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols); void convolve_5x5_avx(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols); void convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols) { #if defined(__AVX__) int avx = haveCPUFeature(kCPUFeature_AVX); if (avx) { convolve_5x5_avx(output, input, kernel, outRows, outCols, outCols, inCols); } else #endif { convolve_5x5_sse(output, input, kernel, outRows, outCols, outCols, inCols); } } lib/TH/generic/simd/convolve.h000066400000000000000000000001471316246254300165230ustar00rootroot00000000000000void convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols);lib/TH/generic/simd/convolve5x5_avx.c000066400000000000000000000142301316246254300177340ustar00rootroot00000000000000#include #include "common_simd.h" #define CLEAR_AVX() _mm256_zeroupper() void convolve_5x5_1_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount = count & 0xFFFFFFF8; DECLARE_OUTPUT_1() for (; i < alignedCount; i+=8) { CONVOLVE_8COLS_XROWS(1, i) } } void convolve_5x5_2_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount = count & 0xFFFFFFF8; DECLARE_OUTPUT_2() for (; i < alignedCount; i+=8) { CONVOLVE_8COLS_XROWS(2, i) } } void convolve_5x5_4_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount = count & 0xFFFFFFF8; DECLARE_OUTPUT_4() for (; i < alignedCount; i+=8) { CONVOLVE_8COLS_XROWS(4, i) } } void convolve_5x5_5_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount = count & 0xFFFFFFF8; DECLARE_OUTPUT_5() for (; i < alignedCount; i+=8) { CONVOLVE_8COLS_XROWS(5, i) } } void convolve_5x5_6_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount = count & 0xFFFFFFF8; DECLARE_OUTPUT_6() for (; i < alignedCount; i+=8) { CONVOLVE_8COLS_XROWS(6, i) } } void convolve_5x5_7_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount = count & 0xFFFFFFF8; DECLARE_OUTPUT_7() for (; i < alignedCount; i+=8) { CONVOLVE_8COLS_XROWS(7, i) } } void convolve_5x5_8_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount = count & 0xFFFFFFF8; DECLARE_OUTPUT_8() for (; i < alignedCount; i+=8) { CONVOLVE_8COLS_XROWS(8, i) } } void convolve_5x5_64x64_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { for(int i = 0; i < 60; i+=6) { DECLARE_OUTPUT_6() CONVOLVE_8COLS_XROWS(6, 0) CONVOLVE_8COLS_XROWS(6, 8) CONVOLVE_8COLS_XROWS(6, 16) CONVOLVE_8COLS_XROWS(6, 24) CONVOLVE_8COLS_XROWS(6, 32) CONVOLVE_8COLS_XROWS(6, 40) CONVOLVE_8COLS_XROWS(6, 48) CONVOLVE_8COLS_XROWS(6, 56) output += outputStride * 6; image += inputStride * 6; } DECLARE_OUTPUT_4() CONVOLVE_8COLS_XROWS(4, 0) CONVOLVE_8COLS_XROWS(4, 8) CONVOLVE_8COLS_XROWS(4, 16) CONVOLVE_8COLS_XROWS(4, 24) CONVOLVE_8COLS_XROWS(4, 32) CONVOLVE_8COLS_XROWS(4, 40) CONVOLVE_8COLS_XROWS(4, 48) CONVOLVE_8COLS_XROWS(4, 56) } void convolve_5x5_32x32_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { for(int i = 0; i < 30; i+=6) { DECLARE_OUTPUT_6() CONVOLVE_8COLS_XROWS(6, 0) CONVOLVE_8COLS_XROWS(6, 8) CONVOLVE_8COLS_XROWS(6, 16) CONVOLVE_8COLS_XROWS(6, 24) output += outputStride * 6; image += inputStride * 6; } DECLARE_OUTPUT_2() CONVOLVE_8COLS_XROWS(2, 0) CONVOLVE_8COLS_XROWS(2, 8) CONVOLVE_8COLS_XROWS(2, 16) CONVOLVE_8COLS_XROWS(2, 24) } void convolve_5x5_16x16_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { for(int i = 0; i < 12; i+=6) { DECLARE_OUTPUT_6() CONVOLVE_8COLS_XROWS(6, 0) CONVOLVE_8COLS_XROWS(6, 8) output += outputStride * 6; image += inputStride * 6; } DECLARE_OUTPUT_4() CONVOLVE_8COLS_XROWS(4, 0) CONVOLVE_8COLS_XROWS(4, 8) } void convolve_5x5_8x8_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { DECLARE_OUTPUT_8() CONVOLVE_8COLS_XROWS(8, 0) } void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols); void convolve_5x5_avx(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols) { long ic = inCols; long yy = 0; float* t_ = input; float* r_ = output; float* k_ = kernel; if((outRows == 64) && (outCols == 64)) { convolve_5x5_64x64_avx(output, input, kernel, outRows, outStride, inCols); return; } if((outRows == 32) && (outCols == 32)) { convolve_5x5_32x32_avx(output, input, kernel, outRows, outStride, inCols); return; } if((outRows == 16) && (outCols == 16)) { convolve_5x5_16x16_avx(output, input, kernel, outRows, outStride, inCols); return; } if((outRows == 8) && (outCols == 8)) { convolve_5x5_8x8_avx(output, input, kernel, outRows, outStride, inCols); return; } for(; yy < (outRows / 6 ) * 6; yy += 6) { float *pi_ = t_ + yy*ic; float *pw_ = k_; float *pis_ = pi_; convolve_5x5_6_avx(r_, pis_, pw_, outCols, outStride, ic); r_ += (outStride * 6); } // more than 2 rows left to process and we ended up on a non-multiple of 4 if((yy < (outRows & 0xFFFFFFFE)) && ((yy % 4) != 0)) { // process 2 rows to align on the next multiple of 4 rows (because we were a multiple of 6 after the previous loop) float *pi_ = t_ + yy*ic; float *pw_ = k_; float *pis_ = pi_; convolve_5x5_2_avx(r_, pis_, pw_, outCols, outStride, ic); r_ += (outStride * 2); yy += 2; } for(; yy < (outRows & 0xFFFFFFFC); yy += 4) { float *pi_ = t_ + yy*ic; float *pw_ = k_; float *pis_ = pi_; convolve_5x5_4_avx(r_, pis_, pw_, outCols, outStride, ic); r_ += (outStride * 4); } for(; yy < (outRows & 0xFFFFFFFE); yy += 2) { float *pi_ = t_ + yy*ic; float *pw_ = k_; float *pis_ = pi_; convolve_5x5_2_avx(r_, pis_, pw_, outCols, outStride, ic); r_ += (outStride * 2); } for(; yy < outRows; yy += 1) { float *pi_ = t_ + yy*ic; float *pw_ = k_; float *pis_ = pi_; convolve_5x5_1_avx(r_, pis_, pw_, outCols, outStride, ic); r_ += (outStride * 1); } long procCols = outCols & 0xFFFFFFF8; // avx version processes 8 cols at a time long remCols = outCols - procCols; //process the rest using sse if( remCols > 0) { CLEAR_AVX(); convolve_5x5_sse(&output[procCols], &input[procCols], kernel, outRows, remCols, outStride, inCols); } }lib/TH/generic/simd/convolve5x5_sse.c000066400000000000000000000251531316246254300177360ustar00rootroot00000000000000#include #include "common_simd.h" /* SSE variants */ void convolve_5x5_1_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount4 = count & 0xFFFFFFFC; DECLARE_OUTPUT_1() for (; i < alignedCount4; i+=4) { CONVOLVE_4COLS_XROWS(1, i) } for (; i < (count); i++) { float output0 = output[i + outputStride * 0]; int row; for (row = 0; row < 5; row++) { int col; for (col = 0; col < 5; col++) { output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col]; } } output[i + outputStride * 0] = output0; } } void convolve_5x5_2_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount4 = count & 0xFFFFFFFC; DECLARE_OUTPUT_2() for (; i < alignedCount4; i+=4) { CONVOLVE_4COLS_XROWS(2, i) } for (; i < (count); i++) { float output0 = output[i + outputStride * 0]; float output1 = output[i + outputStride * 1]; int row; for (row = 0; row < 5; row++) { int col; for (col = 0; col < 5; col++) { output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col]; output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col]; } } output[i + outputStride * 0] = output0; output[i + outputStride * 1] = output1; } } void convolve_5x5_4_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount4 = count & 0xFFFFFFFC; DECLARE_OUTPUT_4() for (; i < alignedCount4; i+=4) { CONVOLVE_4COLS_XROWS(4, i) } for (; i < (count); i++) { float output0 = output[i + outputStride * 0]; float output1 = output[i + outputStride * 1]; float output2 = output[i + outputStride * 2]; float output3 = output[i + outputStride * 3]; int row; for (row = 0; row < 5; row++) { int col; for (col = 0; col < 5; col++) { output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col]; output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col]; output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col]; output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col]; } } output[i + outputStride * 0] = output0; output[i + outputStride * 1] = output1; output[i + outputStride * 2] = output2; output[i + outputStride * 3] = output3; } } void convolve_5x5_6_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount4 = count & 0xFFFFFFFC; DECLARE_OUTPUT_6() for (; i < alignedCount4; i+=4) { CONVOLVE_4COLS_XROWS(6, i) } for (; i<(count); i++) { float output0 = output[i + outputStride * 0]; float output1 = output[i + outputStride * 1]; float output2 = output[i + outputStride * 2]; float output3 = output[i + outputStride * 3]; float output4 = output[i + outputStride * 4]; float output5 = output[i + outputStride * 5]; int row; for (row = 0; row < 5; row++) { int col; for (col = 0; col < 5; col++) { output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col]; output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col]; output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col]; output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col]; output4 += weight[5 * row + col] * image[i + (row + 4) * inputStride + col]; output5 += weight[5 * row + col] * image[i + (row + 5) * inputStride + col]; } } output[i + outputStride * 0] = output0; output[i + outputStride * 1] = output1; output[i + outputStride * 2] = output2; output[i + outputStride * 3] = output3; output[i + outputStride * 4] = output4; output[i + outputStride * 5] = output5; } } void convolve_5x5_8_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { long i = 0; long alignedCount4 = count & 0xFFFFFFFC; DECLARE_OUTPUT_8() for (; i < alignedCount4; i+=4) { CONVOLVE_4COLS_XROWS(8, i) } for (; i<(count); i++) { float output0 = output[i + outputStride * 0]; float output1 = output[i + outputStride * 1]; float output2 = output[i + outputStride * 2]; float output3 = output[i + outputStride * 3]; float output4 = output[i + outputStride * 4]; float output5 = output[i + outputStride * 5]; float output6 = output[i + outputStride * 6]; float output7 = output[i + outputStride * 7]; int row; for (row = 0; row < 5; row++) { int col; for (col = 0; col < 5; col++) { output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col]; output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col]; output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col]; output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col]; output4 += weight[5 * row + col] * image[i + (row + 4) * inputStride + col]; output5 += weight[5 * row + col] * image[i + (row + 5) * inputStride + col]; output6 += weight[5 * row + col] * image[i + (row + 6) * inputStride + col]; output7 += weight[5 * row + col] * image[i + (row + 7) * inputStride + col]; } } output[i + outputStride * 0] = output0; output[i + outputStride * 1] = output1; output[i + outputStride * 2] = output2; output[i + outputStride * 3] = output3; output[i + outputStride * 4] = output4; output[i + outputStride * 5] = output5; output[i + outputStride * 6] = output6; output[i + outputStride * 7] = output7; } } #define UNROLL_SSE_CONVOLUTION 0 #if (UNROLL_SSE_CONVOLUTION) void convolve_5x5_64x64_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { for(int i = 0; i < 60; i+=6) { DECLARE_OUTPUT_6() CONVOLVE_4COLS_XROWS(6, 0) CONVOLVE_4COLS_XROWS(6, 4) CONVOLVE_4COLS_XROWS(6, 8) CONVOLVE_4COLS_XROWS(6, 12) CONVOLVE_4COLS_XROWS(6, 16) CONVOLVE_4COLS_XROWS(6, 20) CONVOLVE_4COLS_XROWS(6, 24) CONVOLVE_4COLS_XROWS(6, 28) CONVOLVE_4COLS_XROWS(6, 32) CONVOLVE_4COLS_XROWS(6, 36) CONVOLVE_4COLS_XROWS(6, 40) CONVOLVE_4COLS_XROWS(6, 44) CONVOLVE_4COLS_XROWS(6, 48) CONVOLVE_4COLS_XROWS(6, 52) CONVOLVE_4COLS_XROWS(6, 56) CONVOLVE_4COLS_XROWS(6, 60) output += outputStride * 6; image += inputStride * 6; } DECLARE_OUTPUT_4() CONVOLVE_4COLS_XROWS(4, 0) CONVOLVE_4COLS_XROWS(4, 4) CONVOLVE_4COLS_XROWS(4, 8) CONVOLVE_4COLS_XROWS(4, 12) CONVOLVE_4COLS_XROWS(4, 16) CONVOLVE_4COLS_XROWS(4, 20) CONVOLVE_4COLS_XROWS(4, 24) CONVOLVE_4COLS_XROWS(4, 28) CONVOLVE_4COLS_XROWS(4, 32) CONVOLVE_4COLS_XROWS(4, 36) CONVOLVE_4COLS_XROWS(4, 40) CONVOLVE_4COLS_XROWS(4, 44) CONVOLVE_4COLS_XROWS(4, 48) CONVOLVE_4COLS_XROWS(4, 52) CONVOLVE_4COLS_XROWS(4, 56) CONVOLVE_4COLS_XROWS(4, 60) } void convolve_5x5_32x32_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { for(int i = 0; i < 30; i+=6) { DECLARE_OUTPUT_6() CONVOLVE_4COLS_XROWS(6, 0) CONVOLVE_4COLS_XROWS(6, 4) CONVOLVE_4COLS_XROWS(6, 8) CONVOLVE_4COLS_XROWS(6, 12) CONVOLVE_4COLS_XROWS(6, 16) CONVOLVE_4COLS_XROWS(6, 20) CONVOLVE_4COLS_XROWS(6, 24) CONVOLVE_4COLS_XROWS(6, 28) output += outputStride * 6; image += inputStride * 6; } DECLARE_OUTPUT_2() CONVOLVE_4COLS_XROWS(2, 0) CONVOLVE_4COLS_XROWS(2, 4) CONVOLVE_4COLS_XROWS(2, 8) CONVOLVE_4COLS_XROWS(2, 12) CONVOLVE_4COLS_XROWS(2, 16) CONVOLVE_4COLS_XROWS(2, 20) CONVOLVE_4COLS_XROWS(2, 24) CONVOLVE_4COLS_XROWS(2, 28) } void convolve_5x5_16x16_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { for(int i = 0; i < 12; i+=6) { DECLARE_OUTPUT_6() CONVOLVE_4COLS_XROWS(6, 0) CONVOLVE_4COLS_XROWS(6, 4) CONVOLVE_4COLS_XROWS(6, 8) CONVOLVE_4COLS_XROWS(6, 12) output += outputStride * 6; image += inputStride * 6; } DECLARE_OUTPUT_4() CONVOLVE_4COLS_XROWS(4, 0) CONVOLVE_4COLS_XROWS(4, 4) CONVOLVE_4COLS_XROWS(4, 8) CONVOLVE_4COLS_XROWS(4, 12) } void convolve_5x5_8x8_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { DECLARE_OUTPUT_8() CONVOLVE_4COLS_XROWS(8, 0) CONVOLVE_4COLS_XROWS(8, 4) } #endif void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols) { long yy = 0; float* t_ = input; float* r_ = output; float* k_ = kernel; #if (UNROLL_SSE_CONVOLUTION) if((outRows == 64) && (outCols == 64)) { convolve_5x5_64x64_sse(output, input, kernel, outRows, outStride, inCols); return; } if((outRows == 32) && (outCols == 32)) { convolve_5x5_32x32_sse(output, input, kernel, outRows, outStride, inCols); return; } if((outRows == 16) && (outCols == 16)) { convolve_5x5_16x16_sse(output, input, kernel, outRows, outStride, inCols); return; } if((outRows == 8) && (outCols == 8)) { convolve_5x5_8x8_sse(output, input, kernel, outRows, outStride, inCols); return; } #endif for(; yy < (outRows / 6 ) * 6; yy += 6) { float *pi_ = t_ + yy*inCols; float *pw_ = k_; float *pis_ = pi_; convolve_5x5_6_sse(r_, pis_, pw_, outCols, outStride, inCols); r_ += (outStride * 6); } // more than 2 rows left to process and we ended up on a non-multiple of 4 if((yy < (outRows & 0xFFFFFFFE)) && ((yy % 4) != 0)) { // process 2 rows to align on the next multiple of 4 rows (because we were a multiple of 6 after the previous loop) float *pi_ = t_ + yy*inCols; float *pw_ = k_; float *pis_ = pi_; convolve_5x5_2_sse(r_, pis_, pw_, outCols, outStride, inCols); r_ += (outStride * 2); yy += 2; } for(; yy < (outRows & 0xFFFFFFFC); yy += 4) { float *pi_ = t_ + yy*inCols; float *pw_ = k_; float *pis_ = pi_; convolve_5x5_4_sse(r_, pis_, pw_, outCols, outStride, inCols); r_ += (outStride * 4); } for(; yy < (outRows & 0xFFFFFFFE); yy += 2) { float *pi_ = t_ + yy*inCols; float *pw_ = k_; float *pis_ = pi_; convolve_5x5_2_sse(r_, pis_, pw_, outCols, outStride, inCols); r_ += (outStride * 2); } for(; yy < outRows; yy += 1) { float *pi_ = t_ + yy*inCols; float *pw_ = k_; float *pis_ = pi_; convolve_5x5_1_sse(r_, pis_, pw_, outCols, outStride, inCols); r_ += (outStride * 1); } } lib/TH/generic/simd/simd.h000066400000000000000000000077051316246254300156330ustar00rootroot00000000000000#ifndef TH_SIMD_INC #define TH_SIMD_INC #include #include #if defined(_MSC_VER) #include #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) #include #endif // Can be found on Intel ISA Reference for CPUID #define CPUID_AVX2_BIT 0x20 // Bit 5 of EBX for EAX=0x7 #define CPUID_AVX_BIT 0x10000000 // Bit 28 of ECX for EAX=0x1 #define CPUID_SSE_BIT 0x2000000 // bit 25 of EDX for EAX=0x1 // Helper macros for initialization #define FUNCTION_IMPL(NAME, EXT) \ { .function=(void *)NAME, \ .supportedSimdExt=EXT \ } #define INIT_DISPATCH_PTR(OP) \ do { \ int i; \ for (i = 0; i < sizeof(THVector_(OP ## _DISPATCHTABLE)) / sizeof(FunctionDescription); ++i) { \ THVector_(OP ## _DISPATCHPTR) = THVector_(OP ## _DISPATCHTABLE)[i].function; \ if (THVector_(OP ## _DISPATCHTABLE)[i].supportedSimdExt & hostSimdExts) { \ break; \ } \ } \ } while(0) typedef struct FunctionDescription { void *function; uint32_t supportedSimdExt; } FunctionDescription; enum SIMDExtensions { #if defined(__NEON__) SIMDExtension_NEON = 0x1, #elif defined(__PPC64__) SIMDExtension_VSX = 0x1, #else SIMDExtension_AVX2 = 0x1, SIMDExtension_AVX = 0x2, SIMDExtension_SSE = 0x4, #endif SIMDExtension_DEFAULT = 0x0 }; #if defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 #if defined(__NEON__) static inline uint32_t detectHostSIMDExtensions() { return SIMDExtension_NEON; } #else //ARM without NEON static inline uint32_t detectHostSIMDExtensions() { return SIMDExtension_DEFAULT; } #endif #elif defined(__PPC64__) #if defined(__VSX__) static inline uint32_t detectHostSIMDExtensions() { uint32_t hostSimdExts = SIMDExtension_DEFAULT; char *evar; evar = getenv("TH_NO_VSX"); if (evar == NULL || strncmp(evar, "1", 2) != 0) hostSimdExts = SIMDExtension_VSX; return hostSimdExts; } #else //PPC64 without VSX static inline uint32_t detectHostSIMDExtensions() { return SIMDExtension_DEFAULT; } #endif #else // x86 static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { #if defined(_MSC_VER) uint32_t cpuInfo[4]; __cpuid(cpuInfo, *eax); *eax = cpuInfo[0]; *ebx = cpuInfo[1]; *ecx = cpuInfo[2]; *edx = cpuInfo[3]; #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) uint32_t level = *eax; __get_cpuid (level, eax, ebx, ecx, edx); #else uint32_t a = *eax, b, c = *ecx, d; asm volatile ( "cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d) ); *eax = a; *ebx = b; *ecx = c; *edx = d; #endif } static inline uint32_t detectHostSIMDExtensions() { uint32_t eax, ebx, ecx, edx; uint32_t hostSimdExts = 0x0; int TH_NO_AVX = 1, TH_NO_AVX2 = 1, TH_NO_SSE = 1; char *evar; evar = getenv("TH_NO_AVX2"); if (evar == NULL || strncmp(evar, "1", 2) != 0) TH_NO_AVX2 = 0; // Check for AVX2. Requires separate CPUID eax = 0x7; ecx = 0x0; cpuid(&eax, &ebx, &ecx, &edx); if ((ebx & CPUID_AVX2_BIT) && TH_NO_AVX2 == 0) { hostSimdExts |= SIMDExtension_AVX2; } // Detect and enable AVX and SSE eax = 0x1; cpuid(&eax, &ebx, &ecx, &edx); evar = getenv("TH_NO_AVX"); if (evar == NULL || strncmp(evar, "1", 2) != 0) TH_NO_AVX = 0; if (ecx & CPUID_AVX_BIT && TH_NO_AVX == 0) { hostSimdExts |= SIMDExtension_AVX; } evar = getenv("TH_NO_SSE"); if (evar == NULL || strncmp(evar, "1", 2) != 0) TH_NO_SSE = 0; if (edx & CPUID_SSE_BIT && TH_NO_SSE == 0) { hostSimdExts |= SIMDExtension_SSE; } return hostSimdExts; } #endif // end SIMD extension detection code #endif lib/TH/vector/000077500000000000000000000000001316246254300134475ustar00rootroot00000000000000lib/TH/vector/AVX.c000066400000000000000000000165631316246254300142640ustar00rootroot00000000000000#if defined(__AVX__) #ifndef _MSC_VER #include #else #include #endif #include "AVX.h" void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n) { ptrdiff_t i; ptrdiff_t off; for (i=0; i<=((n)-8); i+=8) { _mm256_storeu_pd(y+i, _mm256_loadu_pd(x+i)); _mm256_storeu_pd(y+i+4, _mm256_loadu_pd(x+i+4)); } off = (n) - ((n)%8); for (i=0; i<((n)%8); i++) { y[off+i] = x[off+i]; } } void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; ptrdiff_t off; __m256d YMM0 = _mm256_set_pd(c, c, c, c); for (i=0; i<=((n)-16); i+=16) { _mm256_storeu_pd((x)+i , YMM0); _mm256_storeu_pd((x)+i+4, YMM0); _mm256_storeu_pd((x)+i+8, YMM0); _mm256_storeu_pd((x)+i+12, YMM0); } off = (n) - ((n)%16); for (i=0; i<((n)%16); i++) { x[off+i] = c; } } void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) { ptrdiff_t i; __m256d YMM0, YMM1, YMM2, YMM3; for (i=0; i<=((n)-8); i+=8) { YMM0 = _mm256_loadu_pd(x+i); YMM1 = _mm256_loadu_pd(x+i+4); YMM2 = _mm256_loadu_pd(y+i); YMM3 = _mm256_loadu_pd(y+i+4); YMM2 = _mm256_div_pd(YMM0, YMM2); YMM3 = _mm256_div_pd(YMM1, YMM3); _mm256_storeu_pd(z+i, YMM2); _mm256_storeu_pd(z+i+4, YMM3); } for (; i<(n); i++) { z[i] = x[i] / y[i]; } } void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; __m256d YMM15 = _mm256_set_pd(c, c, c, c); __m256d YMM0, YMM1; for (i=0; i<=((n)-8); i+=8) { YMM0 = _mm256_loadu_pd(x+i); YMM1 = _mm256_loadu_pd(x+i+4); YMM0 = _mm256_div_pd(YMM0, YMM15); YMM1 = _mm256_div_pd(YMM1, YMM15); _mm256_storeu_pd(y+i, YMM0); _mm256_storeu_pd(y+i+4, YMM1); } for (; i<(n); i++) { y[i] = x[i] / c; } } void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) { ptrdiff_t i; __m256d YMM0, YMM1, YMM2, YMM3; for (i=0; i<=((n)-8); i+=8) { YMM0 = _mm256_loadu_pd(x+i); YMM1 = _mm256_loadu_pd(x+i+4); YMM2 = _mm256_loadu_pd(y+i); YMM3 = _mm256_loadu_pd(y+i+4); YMM2 = _mm256_mul_pd(YMM0, YMM2); YMM3 = _mm256_mul_pd(YMM1, YMM3); _mm256_storeu_pd(z+i, YMM2); _mm256_storeu_pd(z+i+4, YMM3); } for (; i void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n); void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n); void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n); void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n); void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n); void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n); void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n); void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n); void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n); void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n); void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n); void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n); void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n); void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n); void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n); void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n); #endif lib/TH/vector/AVX2.c000066400000000000000000000025011316246254300143310ustar00rootroot00000000000000#if defined(__AVX2__) #ifndef _MSC_VER #include #else #include #endif #include "AVX2.h" void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) { ptrdiff_t i; __m256d YMM15 = _mm256_set_pd(c, c, c, c); __m256d YMM0, YMM1, YMM2, YMM3; for (i=0; i<=((n)-8); i+=8) { YMM0 = _mm256_loadu_pd(y+i); YMM1 = _mm256_loadu_pd(y+i+4); YMM2 = _mm256_loadu_pd(x+i); YMM3 = _mm256_loadu_pd(x+i+4); YMM2 = _mm256_fmadd_pd(YMM0, YMM15, YMM2); YMM3 = _mm256_fmadd_pd(YMM1, YMM15, YMM3); _mm256_storeu_pd(z+i, YMM2); _mm256_storeu_pd(z+i+4, YMM3); } for (; i<(n); i++) { z[i] = x[i] + y[i] * c; } } void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) { ptrdiff_t i; __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c); __m256 YMM0, YMM1, YMM2, YMM3; for (i=0; i<=((n)-16); i+=16) { YMM0 = _mm256_loadu_ps(y+i); YMM1 = _mm256_loadu_ps(y+i+8); YMM2 = _mm256_loadu_ps(x+i); YMM3 = _mm256_loadu_ps(x+i+8); YMM2 = _mm256_fmadd_ps(YMM0, YMM15, YMM2); YMM3 = _mm256_fmadd_ps(YMM1, YMM15, YMM3); _mm256_storeu_ps(z+i, YMM2); _mm256_storeu_ps(z+i+8, YMM3); } for (; i<(n); i++) { z[i] = x[i] + y[i] * c; } } #endif // defined(__AVX2__) lib/TH/vector/AVX2.h000066400000000000000000000004331316246254300143400ustar00rootroot00000000000000#ifndef TH_AVX2_H #define TH_AVX2_H #include void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n); void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n); #endif lib/TH/vector/NEON.c000066400000000000000000000040011316246254300143450ustar00rootroot00000000000000static void THFloatVector_fill_NEON(float *x, const float c, const ptrdiff_t n) { long i = 0; for(; i < n-4; i += 4) { x[i] = c; x[i+1] = c; x[i+2] = c; x[i+3] = c; } for(; i < n; i++) x[i] = c; } static void THFloatVector_cmul_NEON(float *z, const float *x, const float* y, const ptrdiff_t n) { long i = 0; for(; i < n-4; i += 4) { z[i] = x[i] * y[i]; z[i+1] = x[i+1] * y[i+1]; z[i+2] = x[i+2] * y[i+2]; z[i+3] = x[i+3] * y[i+3]; } for(; i < n; i++) z[i] = x[i] * y[i]; } static void THFloatVector_muls_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { long i = 0; for(; i < n-4; i += 4) { y[i] = x[i] * c; y[i+1] = x[i+1] * c; y[i+2] = x[i+2] * c; y[i+3] = x[i+3] * c; } for(; i < n; i++) y[i] = x[i] * c; } static void THFloatVector_cadd_NEON(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) { long i = 0; for(;i < n-4; i += 4) { z[i] = x[i] + c * y[i]; z[i+1] = x[i+1] + c * y[i+1]; z[i+2] = x[i+2] + c * y[i+2]; z[i+3] = x[i+3] + c * y[i+3]; } for(; i < n; i++) z[i] = x[i] + c * y[i]; } static void THFloatVector_adds_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { long i = 0; for(;i < n-4; i += 4) { y[i] = x[i] + c; y[i+1] = x[i+1] + c; y[i+2] = x[i+2] + c; y[i+3] = x[i+3] + c; } for(; i < n; i++) y[i] = x[i] + c; } static void THFloatVector_cdiv_NEON(float *z, const float *x, const float *y, const ptrdiff_t n) { long i = 0; for(;i < n-4; i += 4) { z[i] = x[i] / y[i]; z[i+1] = x[i+1] / y[i+1]; z[i+2] = x[i+2] / y[i+2]; z[i+3] = x[i+3] / y[i+3]; } for(; i < n; i++) z[i] = x[i] / y[i]; } static void THFloatVector_divs_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { long i = 0; for(;i < n-4; i += 4) { y[i] = x[i] / c; y[i+1] = x[i+1] / c; y[i+2] = x[i+2] / c; y[i+3] = x[i+3] / c; } for(; i < n; i++) y[i] = x[i] / c; } lib/TH/vector/SSE.c000066400000000000000000000167251316246254300142600ustar00rootroot00000000000000#ifndef _MSC_VER #include #else #include #endif static void THDoubleVector_fill_SSE(double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; ptrdiff_t off; __m128d XMM0 = _mm_set1_pd(c); for (i=0; i<=((n)-8); i+=8) { _mm_storeu_pd((x)+i , XMM0); _mm_storeu_pd((x)+i+2, XMM0); _mm_storeu_pd((x)+i+4, XMM0); _mm_storeu_pd((x)+i+6, XMM0); } off = (n) - ((n)%8); for (i=0; i<((n)%8); i++) { x[off+i] = c; } } static void THDoubleVector_cadd_SSE(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) { ptrdiff_t i; __m128d XMM7 = _mm_set1_pd(c); __m128d XMM0, XMM2; for (i=0; i<=((n)-2); i+=2) { XMM0 = _mm_loadu_pd((x)+i); XMM2 = _mm_loadu_pd((y)+i); XMM2 = _mm_mul_pd(XMM2, XMM7); XMM2 = _mm_add_pd(XMM0, XMM2); _mm_storeu_pd((z)+i, XMM2); } for (; i<(n); i++) { z[i] = x[i] + c * y[i]; } } static void THDoubleVector_adds_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; __m128d XMM7 = _mm_set1_pd(c); __m128d XMM0, XMM2; for (i=0; i<=((n)-4); i+=4) { XMM0 = _mm_loadu_pd((x)+i); XMM2 = _mm_loadu_pd((x)+i+2); XMM0 = _mm_add_pd(XMM0, XMM7); XMM2 = _mm_add_pd(XMM2, XMM7); _mm_storeu_pd((y)+i, XMM0); _mm_storeu_pd((y)+i+2, XMM2); } for (; i<(n); i++) { y[i] = x[i] + c; } } static void THDoubleVector_cmul_SSE(double *z, const double *x, const double *y, const ptrdiff_t n) { ptrdiff_t i; for (i=0; i<=((n)-8); i+=8) { __m128d XMM0 = _mm_loadu_pd((x)+i ); __m128d XMM1 = _mm_loadu_pd((x)+i+2); __m128d XMM2 = _mm_loadu_pd((x)+i+4); __m128d XMM3 = _mm_loadu_pd((x)+i+6); __m128d XMM4 = _mm_loadu_pd((y)+i ); __m128d XMM5 = _mm_loadu_pd((y)+i+2); __m128d XMM6 = _mm_loadu_pd((y)+i+4); __m128d XMM7 = _mm_loadu_pd((y)+i+6); XMM4 = _mm_mul_pd(XMM4, XMM0); XMM5 = _mm_mul_pd(XMM5, XMM1); XMM6 = _mm_mul_pd(XMM6, XMM2); XMM7 = _mm_mul_pd(XMM7, XMM3); _mm_storeu_pd((z)+i , XMM4); _mm_storeu_pd((z)+i+2, XMM5); _mm_storeu_pd((z)+i+4, XMM6); _mm_storeu_pd((z)+i+6, XMM7); } for (; i<(n); i++) { z[i] = x[i] * y[i]; } } static void THDoubleVector_muls_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; __m128d XMM15 = _mm_set1_pd(c); for (i=0; i<=((n)-8); i+=8) { __m128d XMM0 = _mm_loadu_pd((x)+i ); __m128d XMM1 = _mm_loadu_pd((x)+i+2); __m128d XMM2 = _mm_loadu_pd((x)+i+4); __m128d XMM3 = _mm_loadu_pd((x)+i+6); __m128d XMM4 = _mm_mul_pd(XMM15, XMM0); __m128d XMM5 = _mm_mul_pd(XMM15, XMM1); __m128d XMM6 = _mm_mul_pd(XMM15, XMM2); __m128d XMM7 = _mm_mul_pd(XMM15, XMM3); _mm_storeu_pd((y)+i , XMM4); _mm_storeu_pd((y)+i+2, XMM5); _mm_storeu_pd((y)+i+4, XMM6); _mm_storeu_pd((y)+i+6, XMM7); } for (; i<(n); i++) { y[i] = x[i] * c; } } static void THDoubleVector_cdiv_SSE(double *z, const double *x, const double *y, const ptrdiff_t n) { ptrdiff_t i; __m128d XMM0, XMM1, XMM2, XMM3; for (i=0; i<=((n)-4); i+=4) { XMM0 = _mm_loadu_pd(x+i); XMM1 = _mm_loadu_pd(x+i+2); XMM2 = _mm_loadu_pd(y+i); XMM3 = _mm_loadu_pd(y+i+2); XMM2 = _mm_div_pd(XMM0, XMM2); XMM3 = _mm_div_pd(XMM1, XMM3); _mm_storeu_pd(z+i, XMM2); _mm_storeu_pd(z+i+2, XMM3); } for (; i<(n); i++) { z[i] = x[i] / y[i]; } } static void THDoubleVector_divs_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; __m128d XMM7 = _mm_set1_pd(c); __m128d XMM0, XMM1; for (i=0; i<=((n)-4); i+=4) { XMM0 = _mm_loadu_pd(x+i); XMM1 = _mm_loadu_pd(x+i+2); XMM0 = _mm_div_pd(XMM0, XMM7); XMM1 = _mm_div_pd(XMM1, XMM7); _mm_storeu_pd(y+i, XMM0); _mm_storeu_pd(y+i+2, XMM1); } for (; i<(n); i++) { y[i] = x[i] / c; } } static void THFloatVector_fill_SSE(float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; __m128 XMM0 = _mm_set_ps1(c); ptrdiff_t off; for (i=0; i<=((n)-16); i+=16) { _mm_storeu_ps((x)+i , XMM0); _mm_storeu_ps((x)+i+4, XMM0); _mm_storeu_ps((x)+i+8, XMM0); _mm_storeu_ps((x)+i+12, XMM0); } off = (n) - ((n)%16); for (i=0; i<((n)%16); i++) { x[off+i] = c; } } static void THFloatVector_cadd_SSE(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) { ptrdiff_t i; __m128 XMM7 = _mm_set_ps1(c); __m128 XMM0, XMM2; for (i=0; i<=((n)-4); i+=4) { XMM0 = _mm_loadu_ps((x)+i); XMM2 = _mm_loadu_ps((y)+i); XMM2 = _mm_mul_ps(XMM2, XMM7); XMM2 = _mm_add_ps(XMM0, XMM2); _mm_storeu_ps((z)+i, XMM2); } for (; i<(n); i++) { z[i] = x[i] + c * y[i]; } } static void THFloatVector_adds_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; __m128 XMM7 = _mm_set1_ps(c); __m128 XMM0, XMM2; for (i=0; i<=((n)-8); i+=8) { XMM0 = _mm_loadu_ps((x)+i); XMM2 = _mm_loadu_ps((x)+i+4); XMM0 = _mm_add_ps(XMM0, XMM7); XMM2 = _mm_add_ps(XMM2, XMM7); _mm_storeu_ps((y)+i, XMM0); _mm_storeu_ps((y)+i+4, XMM2); } for (; i<(n); i++) { y[i] = x[i] + c; } } static void THFloatVector_cmul_SSE(float *z, const float *x, const float *y, const ptrdiff_t n) { ptrdiff_t i; for (i=0; i<=((n)-16); i+=16) { __m128 XMM0 = _mm_loadu_ps((x)+i ); __m128 XMM1 = _mm_loadu_ps((x)+i+ 4); __m128 XMM2 = _mm_loadu_ps((x)+i+ 8); __m128 XMM3 = _mm_loadu_ps((x)+i+12); __m128 XMM4 = _mm_loadu_ps((y)+i ); __m128 XMM5 = _mm_loadu_ps((y)+i+ 4); __m128 XMM6 = _mm_loadu_ps((y)+i+ 8); __m128 XMM7 = _mm_loadu_ps((y)+i+12); XMM4 = _mm_mul_ps(XMM4, XMM0); XMM5 = _mm_mul_ps(XMM5, XMM1); XMM6 = _mm_mul_ps(XMM6, XMM2); XMM7 = _mm_mul_ps(XMM7, XMM3); _mm_storeu_ps((z)+i , XMM4); _mm_storeu_ps((z)+i+ 4, XMM5); _mm_storeu_ps((z)+i+ 8, XMM6); _mm_storeu_ps((z)+i+12, XMM7); } for (; i<(n); i++) { z[i] = x[i] * y[i]; } } static void THFloatVector_muls_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; __m128 XMM15 = _mm_set_ps1(c); for (i=0; i<=((n)-16); i+=16) { __m128 XMM0 = _mm_loadu_ps((x)+i ); __m128 XMM1 = _mm_loadu_ps((x)+i+ 4); __m128 XMM2 = _mm_loadu_ps((x)+i+ 8); __m128 XMM3 = _mm_loadu_ps((x)+i+12); __m128 XMM4 = _mm_mul_ps(XMM15, XMM0); __m128 XMM5 = _mm_mul_ps(XMM15, XMM1); __m128 XMM6 = _mm_mul_ps(XMM15, XMM2); __m128 XMM7 = _mm_mul_ps(XMM15, XMM3); _mm_storeu_ps((y)+i , XMM4); _mm_storeu_ps((y)+i+ 4, XMM5); _mm_storeu_ps((y)+i+ 8, XMM6); _mm_storeu_ps((y)+i+12, XMM7); } for (; i<(n); i++) { y[i] = x[i] * c; } } static void THFloatVector_cdiv_SSE(float *z, const float *x, const float *y, const ptrdiff_t n) { ptrdiff_t i; __m128 XMM0, XMM1, XMM2, XMM3; for (i=0; i<=((n)-8); i+=8) { XMM0 = _mm_loadu_ps(x+i); XMM1 = _mm_loadu_ps(x+i+4); XMM2 = _mm_loadu_ps(y+i); XMM3 = _mm_loadu_ps(y+i+4); XMM2 = _mm_div_ps(XMM0, XMM2); XMM3 = _mm_div_ps(XMM1, XMM3); _mm_storeu_ps(z+i, XMM2); _mm_storeu_ps(z+i+4, XMM3); } for (; i<(n); i++) { z[i] = x[i] / y[i]; } } static void THFloatVector_divs_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; __m128 XMM7 = _mm_set1_ps(c); __m128 XMM0, XMM1; for (i=0; i<=((n)-8); i+=8) { XMM0 = _mm_loadu_ps(x+i); XMM1 = _mm_loadu_ps(x+i+4); XMM0 = _mm_div_ps(XMM0, XMM7); XMM1 = _mm_div_ps(XMM1, XMM7); _mm_storeu_ps(y+i, XMM0); _mm_storeu_ps(y+i+4, XMM1); } for (; i<(n); i++) { y[i] = x[i] / c; } } lib/TH/vector/VSX.c000066400000000000000000003124001316246254300142730ustar00rootroot00000000000000#ifdef __PPC64__ #include #include //-------------------------------------------------------------------------------------------------- // THDoubleVector_fill_VSX: //-------------------------------------------------------------------------------------------------- static void THDoubleVector_fill_VSX(double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; double val[2] = {c, c}; vector double fp64vec2 = vec_xl(0, val); for (i = 0; i <= n-128; i += 128) { vec_xst(fp64vec2, 0, x+(i )); vec_xst(fp64vec2, 0, x+(i+2 )); vec_xst(fp64vec2, 0, x+(i+4 )); vec_xst(fp64vec2, 0, x+(i+6 )); vec_xst(fp64vec2, 0, x+(i+8 )); vec_xst(fp64vec2, 0, x+(i+10 )); vec_xst(fp64vec2, 0, x+(i+12 )); vec_xst(fp64vec2, 0, x+(i+14 )); vec_xst(fp64vec2, 0, x+(i+16 )); vec_xst(fp64vec2, 0, x+(i+18 )); vec_xst(fp64vec2, 0, x+(i+20 )); vec_xst(fp64vec2, 0, x+(i+22 )); vec_xst(fp64vec2, 0, x+(i+24 )); vec_xst(fp64vec2, 0, x+(i+26 )); vec_xst(fp64vec2, 0, x+(i+28 )); vec_xst(fp64vec2, 0, x+(i+30 )); vec_xst(fp64vec2, 0, x+(i+32 )); vec_xst(fp64vec2, 0, x+(i+34 )); vec_xst(fp64vec2, 0, x+(i+36 )); vec_xst(fp64vec2, 0, x+(i+38 )); vec_xst(fp64vec2, 0, x+(i+40 )); vec_xst(fp64vec2, 0, x+(i+42 )); vec_xst(fp64vec2, 0, x+(i+44 )); vec_xst(fp64vec2, 0, x+(i+46 )); vec_xst(fp64vec2, 0, x+(i+48 )); vec_xst(fp64vec2, 0, x+(i+50 )); vec_xst(fp64vec2, 0, x+(i+52 )); vec_xst(fp64vec2, 0, x+(i+54 )); vec_xst(fp64vec2, 0, x+(i+56 )); vec_xst(fp64vec2, 0, x+(i+58 )); vec_xst(fp64vec2, 0, x+(i+60 )); vec_xst(fp64vec2, 0, x+(i+62 )); vec_xst(fp64vec2, 0, x+(i+64 )); vec_xst(fp64vec2, 0, x+(i+66 )); vec_xst(fp64vec2, 0, x+(i+68 )); vec_xst(fp64vec2, 0, x+(i+70 )); vec_xst(fp64vec2, 0, x+(i+72 )); vec_xst(fp64vec2, 0, x+(i+74 )); vec_xst(fp64vec2, 0, x+(i+76 )); vec_xst(fp64vec2, 0, x+(i+78 )); vec_xst(fp64vec2, 0, x+(i+80 )); vec_xst(fp64vec2, 0, x+(i+82 )); vec_xst(fp64vec2, 0, x+(i+84 )); vec_xst(fp64vec2, 0, x+(i+86 )); vec_xst(fp64vec2, 0, x+(i+88 )); vec_xst(fp64vec2, 0, x+(i+90 )); vec_xst(fp64vec2, 0, x+(i+92 )); vec_xst(fp64vec2, 0, x+(i+94 )); vec_xst(fp64vec2, 0, x+(i+96 )); vec_xst(fp64vec2, 0, x+(i+98 )); vec_xst(fp64vec2, 0, x+(i+100)); vec_xst(fp64vec2, 0, x+(i+102)); vec_xst(fp64vec2, 0, x+(i+104)); vec_xst(fp64vec2, 0, x+(i+106)); vec_xst(fp64vec2, 0, x+(i+108)); vec_xst(fp64vec2, 0, x+(i+110)); vec_xst(fp64vec2, 0, x+(i+112)); vec_xst(fp64vec2, 0, x+(i+114)); vec_xst(fp64vec2, 0, x+(i+116)); vec_xst(fp64vec2, 0, x+(i+118)); vec_xst(fp64vec2, 0, x+(i+120)); vec_xst(fp64vec2, 0, x+(i+122)); vec_xst(fp64vec2, 0, x+(i+124)); vec_xst(fp64vec2, 0, x+(i+126)); } for (; i <= n-16; i += 16) { vec_xst(fp64vec2, 0, x+(i )); vec_xst(fp64vec2, 0, x+(i+2 )); vec_xst(fp64vec2, 0, x+(i+4 )); vec_xst(fp64vec2, 0, x+(i+6 )); vec_xst(fp64vec2, 0, x+(i+8 )); vec_xst(fp64vec2, 0, x+(i+10 )); vec_xst(fp64vec2, 0, x+(i+12 )); vec_xst(fp64vec2, 0, x+(i+14 )); } for (; i <= n-2; i += 2) vec_xst(fp64vec2, 0, x+(i )); for (; i < n; i++) x[i] = c; } //-------------------------------------------------------------------------------------------------- // THDoubleVector_cadds_VSX: //-------------------------------------------------------------------------------------------------- static void THDoubleVector_cadd_VSX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) { ptrdiff_t i; double val[2] = {c, c}; vector double c_fp64vec2 = vec_xl(0, val); vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; for (i = 0; i <= n-24; i += 24) { y0_fp64vec2 = vec_xl(0, y+(i )); y1_fp64vec2 = vec_xl(0, y+(i+2 )); y2_fp64vec2 = vec_xl(0, y+(i+4 )); y3_fp64vec2 = vec_xl(0, y+(i+6 )); y4_fp64vec2 = vec_xl(0, y+(i+8 )); y5_fp64vec2 = vec_xl(0, y+(i+10)); y6_fp64vec2 = vec_xl(0, y+(i+12)); y7_fp64vec2 = vec_xl(0, y+(i+14)); y8_fp64vec2 = vec_xl(0, y+(i+16)); y9_fp64vec2 = vec_xl(0, y+(i+18)); y10_fp64vec2 = vec_xl(0, y+(i+20)); y11_fp64vec2 = vec_xl(0, y+(i+22)); x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); x4_fp64vec2 = vec_xl(0, x+(i+8 )); x5_fp64vec2 = vec_xl(0, x+(i+10)); x6_fp64vec2 = vec_xl(0, x+(i+12)); x7_fp64vec2 = vec_xl(0, x+(i+14)); x8_fp64vec2 = vec_xl(0, x+(i+16)); x9_fp64vec2 = vec_xl(0, x+(i+18)); x10_fp64vec2 = vec_xl(0, x+(i+20)); x11_fp64vec2 = vec_xl(0, x+(i+22)); y0_fp64vec2 = vec_madd(y0_fp64vec2, c_fp64vec2, x0_fp64vec2); y1_fp64vec2 = vec_madd(y1_fp64vec2, c_fp64vec2, x1_fp64vec2); y2_fp64vec2 = vec_madd(y2_fp64vec2, c_fp64vec2, x2_fp64vec2); y3_fp64vec2 = vec_madd(y3_fp64vec2, c_fp64vec2, x3_fp64vec2); y4_fp64vec2 = vec_madd(y4_fp64vec2, c_fp64vec2, x4_fp64vec2); y5_fp64vec2 = vec_madd(y5_fp64vec2, c_fp64vec2, x5_fp64vec2); y6_fp64vec2 = vec_madd(y6_fp64vec2, c_fp64vec2, x6_fp64vec2); y7_fp64vec2 = vec_madd(y7_fp64vec2, c_fp64vec2, x7_fp64vec2); y8_fp64vec2 = vec_madd(y8_fp64vec2, c_fp64vec2, x8_fp64vec2); y9_fp64vec2 = vec_madd(y9_fp64vec2, c_fp64vec2, x9_fp64vec2); y10_fp64vec2 = vec_madd(y10_fp64vec2, c_fp64vec2,x10_fp64vec2); y11_fp64vec2 = vec_madd(y11_fp64vec2, c_fp64vec2,x11_fp64vec2); vec_xst(y0_fp64vec2, 0, z+(i )); vec_xst(y1_fp64vec2, 0, z+(i+2 )); vec_xst(y2_fp64vec2, 0, z+(i+4 )); vec_xst(y3_fp64vec2, 0, z+(i+6 )); vec_xst(y4_fp64vec2, 0, z+(i+8 )); vec_xst(y5_fp64vec2, 0, z+(i+10)); vec_xst(y6_fp64vec2, 0, z+(i+12)); vec_xst(y7_fp64vec2, 0, z+(i+14)); vec_xst(y8_fp64vec2, 0, z+(i+16)); vec_xst(y9_fp64vec2, 0, z+(i+18)); vec_xst(y10_fp64vec2, 0, z+(i+20)); vec_xst(y11_fp64vec2, 0, z+(i+22)); } for (; i <= n-8; i += 8) { y0_fp64vec2 = vec_xl(0, y+(i )); y1_fp64vec2 = vec_xl(0, y+(i+2 )); y2_fp64vec2 = vec_xl(0, y+(i+4 )); y3_fp64vec2 = vec_xl(0, y+(i+6 )); x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); y0_fp64vec2 = vec_madd(y0_fp64vec2, c_fp64vec2, x0_fp64vec2); y1_fp64vec2 = vec_madd(y1_fp64vec2, c_fp64vec2, x1_fp64vec2); y2_fp64vec2 = vec_madd(y2_fp64vec2, c_fp64vec2, x2_fp64vec2); y3_fp64vec2 = vec_madd(y3_fp64vec2, c_fp64vec2, x3_fp64vec2); vec_xst(y0_fp64vec2, 0, z+(i )); vec_xst(y1_fp64vec2, 0, z+(i+2 )); vec_xst(y2_fp64vec2, 0, z+(i+4 )); vec_xst(y3_fp64vec2, 0, z+(i+6 )); } for (; i <= n-2; i += 2) { y0_fp64vec2 = vec_xl(0, y+(i )); x0_fp64vec2 = vec_xl(0, x+(i )); y0_fp64vec2 = vec_madd(y0_fp64vec2, c_fp64vec2, x0_fp64vec2); vec_xst(y0_fp64vec2, 0, z+(i )); } for (; i < n; i++) z[i] = x[i] + c* y[i]; } //-------------------------------------------------------------------------------------------------- // THDoubleVector_adds_VSX: //-------------------------------------------------------------------------------------------------- static void THDoubleVector_adds_VSX(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; double val[2] = {c, c}; vector double c_fp64vec2 = vec_xl(0, val); vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; for (i = 0; i <= n-24; i += 24) { x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); x4_fp64vec2 = vec_xl(0, x+(i+8 )); x5_fp64vec2 = vec_xl(0, x+(i+10)); x6_fp64vec2 = vec_xl(0, x+(i+12)); x7_fp64vec2 = vec_xl(0, x+(i+14)); x8_fp64vec2 = vec_xl(0, x+(i+16)); x9_fp64vec2 = vec_xl(0, x+(i+18)); x10_fp64vec2 = vec_xl(0, x+(i+20)); x11_fp64vec2 = vec_xl(0, x+(i+22)); y0_fp64vec2 = vec_add(x0_fp64vec2, c_fp64vec2); y1_fp64vec2 = vec_add(x1_fp64vec2, c_fp64vec2); y2_fp64vec2 = vec_add(x2_fp64vec2, c_fp64vec2); y3_fp64vec2 = vec_add(x3_fp64vec2, c_fp64vec2); y4_fp64vec2 = vec_add(x4_fp64vec2, c_fp64vec2); y5_fp64vec2 = vec_add(x5_fp64vec2, c_fp64vec2); y6_fp64vec2 = vec_add(x6_fp64vec2, c_fp64vec2); y7_fp64vec2 = vec_add(x7_fp64vec2, c_fp64vec2); y8_fp64vec2 = vec_add(x8_fp64vec2, c_fp64vec2); y9_fp64vec2 = vec_add(x9_fp64vec2, c_fp64vec2); y10_fp64vec2 = vec_add(x10_fp64vec2, c_fp64vec2); y11_fp64vec2 = vec_add(x11_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); vec_xst(y1_fp64vec2, 0, y+(i+2 )); vec_xst(y2_fp64vec2, 0, y+(i+4 )); vec_xst(y3_fp64vec2, 0, y+(i+6 )); vec_xst(y4_fp64vec2, 0, y+(i+8 )); vec_xst(y5_fp64vec2, 0, y+(i+10)); vec_xst(y6_fp64vec2, 0, y+(i+12)); vec_xst(y7_fp64vec2, 0, y+(i+14)); vec_xst(y8_fp64vec2, 0, y+(i+16)); vec_xst(y9_fp64vec2, 0, y+(i+18)); vec_xst(y10_fp64vec2, 0, y+(i+20)); vec_xst(y11_fp64vec2, 0, y+(i+22)); } for (; i <= n-8; i += 8) { x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); y0_fp64vec2 = vec_add(x0_fp64vec2, c_fp64vec2); y1_fp64vec2 = vec_add(x1_fp64vec2, c_fp64vec2); y2_fp64vec2 = vec_add(x2_fp64vec2, c_fp64vec2); y3_fp64vec2 = vec_add(x3_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); vec_xst(y1_fp64vec2, 0, y+(i+2 )); vec_xst(y2_fp64vec2, 0, y+(i+4 )); vec_xst(y3_fp64vec2, 0, y+(i+6 )); } for (; i <= n-2; i += 2) { x0_fp64vec2 = vec_xl(0, x+(i )); y0_fp64vec2 = vec_add(x0_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); } for (; i < n; i++) y[i] = x[i] +c; } //-------------------------------------------------------------------------------------------------- // THDoubleVector_cmul_VSX: //-------------------------------------------------------------------------------------------------- static void THDoubleVector_cmul_VSX(double *z, const double *x, const double *y, const ptrdiff_t n) { ptrdiff_t i; vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; for (i = 0; i <= n-24; i += 24) { y0_fp64vec2 = vec_xl(0, y+(i )); y1_fp64vec2 = vec_xl(0, y+(i+2 )); y2_fp64vec2 = vec_xl(0, y+(i+4 )); y3_fp64vec2 = vec_xl(0, y+(i+6 )); y4_fp64vec2 = vec_xl(0, y+(i+8 )); y5_fp64vec2 = vec_xl(0, y+(i+10)); y6_fp64vec2 = vec_xl(0, y+(i+12)); y7_fp64vec2 = vec_xl(0, y+(i+14)); y8_fp64vec2 = vec_xl(0, y+(i+16)); y9_fp64vec2 = vec_xl(0, y+(i+18)); y10_fp64vec2 = vec_xl(0, y+(i+20)); y11_fp64vec2 = vec_xl(0, y+(i+22)); x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); x4_fp64vec2 = vec_xl(0, x+(i+8 )); x5_fp64vec2 = vec_xl(0, x+(i+10)); x6_fp64vec2 = vec_xl(0, x+(i+12)); x7_fp64vec2 = vec_xl(0, x+(i+14)); x8_fp64vec2 = vec_xl(0, x+(i+16)); x9_fp64vec2 = vec_xl(0, x+(i+18)); x10_fp64vec2 = vec_xl(0, x+(i+20)); x11_fp64vec2 = vec_xl(0, x+(i+22)); y0_fp64vec2 = vec_mul(y0_fp64vec2, x0_fp64vec2); y1_fp64vec2 = vec_mul(y1_fp64vec2, x1_fp64vec2); y2_fp64vec2 = vec_mul(y2_fp64vec2, x2_fp64vec2); y3_fp64vec2 = vec_mul(y3_fp64vec2, x3_fp64vec2); y4_fp64vec2 = vec_mul(y4_fp64vec2, x4_fp64vec2); y5_fp64vec2 = vec_mul(y5_fp64vec2, x5_fp64vec2); y6_fp64vec2 = vec_mul(y6_fp64vec2, x6_fp64vec2); y7_fp64vec2 = vec_mul(y7_fp64vec2, x7_fp64vec2); y8_fp64vec2 = vec_mul(y8_fp64vec2, x8_fp64vec2); y9_fp64vec2 = vec_mul(y9_fp64vec2, x9_fp64vec2); y10_fp64vec2 = vec_mul(y10_fp64vec2, x10_fp64vec2); y11_fp64vec2 = vec_mul(y11_fp64vec2, x11_fp64vec2); vec_xst(y0_fp64vec2, 0, z+(i )); vec_xst(y1_fp64vec2, 0, z+(i+2 )); vec_xst(y2_fp64vec2, 0, z+(i+4 )); vec_xst(y3_fp64vec2, 0, z+(i+6 )); vec_xst(y4_fp64vec2, 0, z+(i+8 )); vec_xst(y5_fp64vec2, 0, z+(i+10)); vec_xst(y6_fp64vec2, 0, z+(i+12)); vec_xst(y7_fp64vec2, 0, z+(i+14)); vec_xst(y8_fp64vec2, 0, z+(i+16)); vec_xst(y9_fp64vec2, 0, z+(i+18)); vec_xst(y10_fp64vec2, 0, z+(i+20)); vec_xst(y11_fp64vec2, 0, z+(i+22)); } for (; i <= n-8; i += 8) { y0_fp64vec2 = vec_xl(0, y+(i )); y1_fp64vec2 = vec_xl(0, y+(i+2 )); y2_fp64vec2 = vec_xl(0, y+(i+4 )); y3_fp64vec2 = vec_xl(0, y+(i+6 )); x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); y0_fp64vec2 = vec_mul(y0_fp64vec2, x0_fp64vec2); y1_fp64vec2 = vec_mul(y1_fp64vec2, x1_fp64vec2); y2_fp64vec2 = vec_mul(y2_fp64vec2, x2_fp64vec2); y3_fp64vec2 = vec_mul(y3_fp64vec2, x3_fp64vec2); vec_xst(y0_fp64vec2, 0, z+(i )); vec_xst(y1_fp64vec2, 0, z+(i+2 )); vec_xst(y2_fp64vec2, 0, z+(i+4 )); vec_xst(y3_fp64vec2, 0, z+(i+6 )); } for (; i <= n-2; i += 2) { y0_fp64vec2 = vec_xl(0, y+(i )); x0_fp64vec2 = vec_xl(0, x+(i )); y0_fp64vec2 = vec_mul(y0_fp64vec2, x0_fp64vec2); vec_xst(y0_fp64vec2, 0, z+(i )); } for (; i < n; i++) z[i] = x[i] * y[i]; } //-------------------------------------------------------------------------------------------------- // THDoubleVector_muls_VSX: //-------------------------------------------------------------------------------------------------- static void THDoubleVector_muls_VSX(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; double val[2] = {c, c}; vector double c_fp64vec2 = vec_xl(0, val); vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; for (i = 0; i <= n-24; i += 24) { x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); x4_fp64vec2 = vec_xl(0, x+(i+8 )); x5_fp64vec2 = vec_xl(0, x+(i+10)); x6_fp64vec2 = vec_xl(0, x+(i+12)); x7_fp64vec2 = vec_xl(0, x+(i+14)); x8_fp64vec2 = vec_xl(0, x+(i+16)); x9_fp64vec2 = vec_xl(0, x+(i+18)); x10_fp64vec2 = vec_xl(0, x+(i+20)); x11_fp64vec2 = vec_xl(0, x+(i+22)); y0_fp64vec2 = vec_mul(x0_fp64vec2, c_fp64vec2); y1_fp64vec2 = vec_mul(x1_fp64vec2, c_fp64vec2); y2_fp64vec2 = vec_mul(x2_fp64vec2, c_fp64vec2); y3_fp64vec2 = vec_mul(x3_fp64vec2, c_fp64vec2); y4_fp64vec2 = vec_mul(x4_fp64vec2, c_fp64vec2); y5_fp64vec2 = vec_mul(x5_fp64vec2, c_fp64vec2); y6_fp64vec2 = vec_mul(x6_fp64vec2, c_fp64vec2); y7_fp64vec2 = vec_mul(x7_fp64vec2, c_fp64vec2); y8_fp64vec2 = vec_mul(x8_fp64vec2, c_fp64vec2); y9_fp64vec2 = vec_mul(x9_fp64vec2, c_fp64vec2); y10_fp64vec2 = vec_mul(x10_fp64vec2, c_fp64vec2); y11_fp64vec2 = vec_mul(x11_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); vec_xst(y1_fp64vec2, 0, y+(i+2 )); vec_xst(y2_fp64vec2, 0, y+(i+4 )); vec_xst(y3_fp64vec2, 0, y+(i+6 )); vec_xst(y4_fp64vec2, 0, y+(i+8 )); vec_xst(y5_fp64vec2, 0, y+(i+10)); vec_xst(y6_fp64vec2, 0, y+(i+12)); vec_xst(y7_fp64vec2, 0, y+(i+14)); vec_xst(y8_fp64vec2, 0, y+(i+16)); vec_xst(y9_fp64vec2, 0, y+(i+18)); vec_xst(y10_fp64vec2, 0, y+(i+20)); vec_xst(y11_fp64vec2, 0, y+(i+22)); } for (; i <= n-8; i += 8) { x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); y0_fp64vec2 = vec_mul(x0_fp64vec2, c_fp64vec2); y1_fp64vec2 = vec_mul(x1_fp64vec2, c_fp64vec2); y2_fp64vec2 = vec_mul(x2_fp64vec2, c_fp64vec2); y3_fp64vec2 = vec_mul(x3_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); vec_xst(y1_fp64vec2, 0, y+(i+2 )); vec_xst(y2_fp64vec2, 0, y+(i+4 )); vec_xst(y3_fp64vec2, 0, y+(i+6 )); } for (; i <= n-2; i += 2) { x0_fp64vec2 = vec_xl(0, x+(i )); y0_fp64vec2 = vec_mul(x0_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); } for (; i < n; i++) y[i] = c * x[i]; } //-------------------------------------------------------------------------------------------------- // THDoubleVector_cdiv_VSX: //-------------------------------------------------------------------------------------------------- static void THDoubleVector_cdiv_VSX(double *z, const double *x, const double *y, const ptrdiff_t n) { ptrdiff_t i; vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; for (i = 0; i <= n-24; i += 24) { y0_fp64vec2 = vec_xl(0, y+(i )); y1_fp64vec2 = vec_xl(0, y+(i+2 )); y2_fp64vec2 = vec_xl(0, y+(i+4 )); y3_fp64vec2 = vec_xl(0, y+(i+6 )); y4_fp64vec2 = vec_xl(0, y+(i+8 )); y5_fp64vec2 = vec_xl(0, y+(i+10)); y6_fp64vec2 = vec_xl(0, y+(i+12)); y7_fp64vec2 = vec_xl(0, y+(i+14)); y8_fp64vec2 = vec_xl(0, y+(i+16)); y9_fp64vec2 = vec_xl(0, y+(i+18)); y10_fp64vec2 = vec_xl(0, y+(i+20)); y11_fp64vec2 = vec_xl(0, y+(i+22)); x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); x4_fp64vec2 = vec_xl(0, x+(i+8 )); x5_fp64vec2 = vec_xl(0, x+(i+10)); x6_fp64vec2 = vec_xl(0, x+(i+12)); x7_fp64vec2 = vec_xl(0, x+(i+14)); x8_fp64vec2 = vec_xl(0, x+(i+16)); x9_fp64vec2 = vec_xl(0, x+(i+18)); x10_fp64vec2 = vec_xl(0, x+(i+20)); x11_fp64vec2 = vec_xl(0, x+(i+22)); y0_fp64vec2 = vec_div(x0_fp64vec2, y0_fp64vec2); y1_fp64vec2 = vec_div(x1_fp64vec2, y1_fp64vec2); y2_fp64vec2 = vec_div(x2_fp64vec2, y2_fp64vec2); y3_fp64vec2 = vec_div(x3_fp64vec2, y3_fp64vec2); y4_fp64vec2 = vec_div(x4_fp64vec2, y4_fp64vec2); y5_fp64vec2 = vec_div(x5_fp64vec2, y5_fp64vec2); y6_fp64vec2 = vec_div(x6_fp64vec2, y6_fp64vec2); y7_fp64vec2 = vec_div(x7_fp64vec2, y7_fp64vec2); y8_fp64vec2 = vec_div(x8_fp64vec2, y8_fp64vec2); y9_fp64vec2 = vec_div(x9_fp64vec2, y9_fp64vec2); y10_fp64vec2 = vec_div(x10_fp64vec2, y10_fp64vec2); y11_fp64vec2 = vec_div(x11_fp64vec2, y11_fp64vec2); vec_xst(y0_fp64vec2, 0, z+(i )); vec_xst(y1_fp64vec2, 0, z+(i+2 )); vec_xst(y2_fp64vec2, 0, z+(i+4 )); vec_xst(y3_fp64vec2, 0, z+(i+6 )); vec_xst(y4_fp64vec2, 0, z+(i+8 )); vec_xst(y5_fp64vec2, 0, z+(i+10)); vec_xst(y6_fp64vec2, 0, z+(i+12)); vec_xst(y7_fp64vec2, 0, z+(i+14)); vec_xst(y8_fp64vec2, 0, z+(i+16)); vec_xst(y9_fp64vec2, 0, z+(i+18)); vec_xst(y10_fp64vec2, 0, z+(i+20)); vec_xst(y11_fp64vec2, 0, z+(i+22)); } for (; i <= n-8; i += 8) { y0_fp64vec2 = vec_xl(0, y+(i )); y1_fp64vec2 = vec_xl(0, y+(i+2 )); y2_fp64vec2 = vec_xl(0, y+(i+4 )); y3_fp64vec2 = vec_xl(0, y+(i+6 )); x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); y0_fp64vec2 = vec_div(x0_fp64vec2, y0_fp64vec2); y1_fp64vec2 = vec_div(x1_fp64vec2, y1_fp64vec2); y2_fp64vec2 = vec_div(x2_fp64vec2, y2_fp64vec2); y3_fp64vec2 = vec_div(x3_fp64vec2, y3_fp64vec2); vec_xst(y0_fp64vec2, 0, z+(i )); vec_xst(y1_fp64vec2, 0, z+(i+2 )); vec_xst(y2_fp64vec2, 0, z+(i+4 )); vec_xst(y3_fp64vec2, 0, z+(i+6 )); } for (; i <= n-2; i += 2) { y0_fp64vec2 = vec_xl(0, y+(i )); x0_fp64vec2 = vec_xl(0, x+(i )); y0_fp64vec2 = vec_div(x0_fp64vec2, y0_fp64vec2); vec_xst(y0_fp64vec2, 0, z+(i )); } for (; i < n; i++) z[i] = x[i] / y[i]; } //-------------------------------------------------------------------------------------------------- // THDoubleVector_divs_VSX: //-------------------------------------------------------------------------------------------------- static void THDoubleVector_divs_VSX(double *y, const double *x, const double c, const ptrdiff_t n) { ptrdiff_t i; double val[2] = {c, c}; vector double c_fp64vec2 = vec_xl(0, val); vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; for (i = 0; i <= n-24; i += 24) { x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); x4_fp64vec2 = vec_xl(0, x+(i+8 )); x5_fp64vec2 = vec_xl(0, x+(i+10)); x6_fp64vec2 = vec_xl(0, x+(i+12)); x7_fp64vec2 = vec_xl(0, x+(i+14)); x8_fp64vec2 = vec_xl(0, x+(i+16)); x9_fp64vec2 = vec_xl(0, x+(i+18)); x10_fp64vec2 = vec_xl(0, x+(i+20)); x11_fp64vec2 = vec_xl(0, x+(i+22)); y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2); y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2); y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2); y4_fp64vec2 = vec_div(x4_fp64vec2, c_fp64vec2); y5_fp64vec2 = vec_div(x5_fp64vec2, c_fp64vec2); y6_fp64vec2 = vec_div(x6_fp64vec2, c_fp64vec2); y7_fp64vec2 = vec_div(x7_fp64vec2, c_fp64vec2); y8_fp64vec2 = vec_div(x8_fp64vec2, c_fp64vec2); y9_fp64vec2 = vec_div(x9_fp64vec2, c_fp64vec2); y10_fp64vec2 = vec_div(x10_fp64vec2, c_fp64vec2); y11_fp64vec2 = vec_div(x11_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); vec_xst(y1_fp64vec2, 0, y+(i+2 )); vec_xst(y2_fp64vec2, 0, y+(i+4 )); vec_xst(y3_fp64vec2, 0, y+(i+6 )); vec_xst(y4_fp64vec2, 0, y+(i+8 )); vec_xst(y5_fp64vec2, 0, y+(i+10)); vec_xst(y6_fp64vec2, 0, y+(i+12)); vec_xst(y7_fp64vec2, 0, y+(i+14)); vec_xst(y8_fp64vec2, 0, y+(i+16)); vec_xst(y9_fp64vec2, 0, y+(i+18)); vec_xst(y10_fp64vec2, 0, y+(i+20)); vec_xst(y11_fp64vec2, 0, y+(i+22)); } for (; i <= n-8; i += 8) { x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+2 )); x2_fp64vec2 = vec_xl(0, x+(i+4 )); x3_fp64vec2 = vec_xl(0, x+(i+6 )); y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2); y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2); y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); vec_xst(y1_fp64vec2, 0, y+(i+2 )); vec_xst(y2_fp64vec2, 0, y+(i+4 )); vec_xst(y3_fp64vec2, 0, y+(i+6 )); vec_xst(y0_fp64vec2, 0, y+(i )); vec_xst(y1_fp64vec2, 0, y+(i+2 )); vec_xst(y2_fp64vec2, 0, y+(i+4 )); vec_xst(y3_fp64vec2, 0, y+(i+6 )); } for (; i <= n-2; i += 2) { x0_fp64vec2 = vec_xl(0, x+(i )); y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); } for (; i < n; i++) y[i] = x[i] / c; } //-------------------------------------------------------------------------------------------------- // THFloatVector_fill_VSX: //-------------------------------------------------------------------------------------------------- static void THFloatVector_fill_VSX(float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; float val[4] = {c, c, c, c}; vector float fp32vec4 = vec_xl(0, val); for (i = 0; i <= n-256; i += 256) { vec_xst(fp32vec4, 0, x+(i )); vec_xst(fp32vec4, 0, x+(i+4 )); vec_xst(fp32vec4, 0, x+(i+8 )); vec_xst(fp32vec4, 0, x+(i+12 )); vec_xst(fp32vec4, 0, x+(i+16 )); vec_xst(fp32vec4, 0, x+(i+20 )); vec_xst(fp32vec4, 0, x+(i+24 )); vec_xst(fp32vec4, 0, x+(i+28 )); vec_xst(fp32vec4, 0, x+(i+32 )); vec_xst(fp32vec4, 0, x+(i+36 )); vec_xst(fp32vec4, 0, x+(i+40 )); vec_xst(fp32vec4, 0, x+(i+44 )); vec_xst(fp32vec4, 0, x+(i+48 )); vec_xst(fp32vec4, 0, x+(i+52 )); vec_xst(fp32vec4, 0, x+(i+56 )); vec_xst(fp32vec4, 0, x+(i+60 )); vec_xst(fp32vec4, 0, x+(i+64 )); vec_xst(fp32vec4, 0, x+(i+68 )); vec_xst(fp32vec4, 0, x+(i+72 )); vec_xst(fp32vec4, 0, x+(i+76 )); vec_xst(fp32vec4, 0, x+(i+80 )); vec_xst(fp32vec4, 0, x+(i+84 )); vec_xst(fp32vec4, 0, x+(i+88 )); vec_xst(fp32vec4, 0, x+(i+92 )); vec_xst(fp32vec4, 0, x+(i+96 )); vec_xst(fp32vec4, 0, x+(i+100)); vec_xst(fp32vec4, 0, x+(i+104)); vec_xst(fp32vec4, 0, x+(i+108)); vec_xst(fp32vec4, 0, x+(i+112)); vec_xst(fp32vec4, 0, x+(i+116)); vec_xst(fp32vec4, 0, x+(i+120)); vec_xst(fp32vec4, 0, x+(i+124)); vec_xst(fp32vec4, 0, x+(i+128)); vec_xst(fp32vec4, 0, x+(i+132)); vec_xst(fp32vec4, 0, x+(i+136)); vec_xst(fp32vec4, 0, x+(i+140)); vec_xst(fp32vec4, 0, x+(i+144)); vec_xst(fp32vec4, 0, x+(i+148)); vec_xst(fp32vec4, 0, x+(i+152)); vec_xst(fp32vec4, 0, x+(i+156)); vec_xst(fp32vec4, 0, x+(i+160)); vec_xst(fp32vec4, 0, x+(i+164)); vec_xst(fp32vec4, 0, x+(i+168)); vec_xst(fp32vec4, 0, x+(i+172)); vec_xst(fp32vec4, 0, x+(i+176)); vec_xst(fp32vec4, 0, x+(i+180)); vec_xst(fp32vec4, 0, x+(i+184)); vec_xst(fp32vec4, 0, x+(i+188)); vec_xst(fp32vec4, 0, x+(i+192)); vec_xst(fp32vec4, 0, x+(i+196)); vec_xst(fp32vec4, 0, x+(i+200)); vec_xst(fp32vec4, 0, x+(i+204)); vec_xst(fp32vec4, 0, x+(i+208)); vec_xst(fp32vec4, 0, x+(i+212)); vec_xst(fp32vec4, 0, x+(i+216)); vec_xst(fp32vec4, 0, x+(i+220)); vec_xst(fp32vec4, 0, x+(i+224)); vec_xst(fp32vec4, 0, x+(i+228)); vec_xst(fp32vec4, 0, x+(i+232)); vec_xst(fp32vec4, 0, x+(i+236)); vec_xst(fp32vec4, 0, x+(i+240)); vec_xst(fp32vec4, 0, x+(i+244)); vec_xst(fp32vec4, 0, x+(i+248)); vec_xst(fp32vec4, 0, x+(i+252)); } for (; i <= n-32; i += 32) { vec_xst(fp32vec4, 0, x+(i )); vec_xst(fp32vec4, 0, x+(i+4 )); vec_xst(fp32vec4, 0, x+(i+8 )); vec_xst(fp32vec4, 0, x+(i+12 )); vec_xst(fp32vec4, 0, x+(i+16 )); vec_xst(fp32vec4, 0, x+(i+20 )); vec_xst(fp32vec4, 0, x+(i+24 )); vec_xst(fp32vec4, 0, x+(i+28 )); } for (; i <= n-4; i += 4) vec_xst(fp32vec4, 0, x+(i )); for (; i < n; i++) x[i] = c; } //-------------------------------------------------------------------------------------------------- // THFloatVector_cadd_VSX: //-------------------------------------------------------------------------------------------------- static void THFloatVector_cadd_VSX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) { ptrdiff_t i; float val[4] = {c, c, c, c}; vector float c_fp32vec4 = vec_xl(0, val); vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4; vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4; vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4; vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4; for (i = 0; i <= n-48; i += 48) { y0_fp32vec4 = vec_xl(0, y+(i )); y1_fp32vec4 = vec_xl(0, y+(i+4 )); y2_fp32vec4 = vec_xl(0, y+(i+8 )); y3_fp32vec4 = vec_xl(0, y+(i+12)); y4_fp32vec4 = vec_xl(0, y+(i+16 )); y5_fp32vec4 = vec_xl(0, y+(i+20)); y6_fp32vec4 = vec_xl(0, y+(i+24)); y7_fp32vec4 = vec_xl(0, y+(i+28)); y8_fp32vec4 = vec_xl(0, y+(i+32)); y9_fp32vec4 = vec_xl(0, y+(i+36)); y10_fp32vec4 = vec_xl(0, y+(i+40)); y11_fp32vec4 = vec_xl(0, y+(i+44)); x0_fp32vec4 = vec_xl(0, x+(i )); x1_fp32vec4 = vec_xl(0, x+(i+4 )); x2_fp32vec4 = vec_xl(0, x+(i+8 )); x3_fp32vec4 = vec_xl(0, x+(i+12 )); x4_fp32vec4 = vec_xl(0, x+(i+16 )); x5_fp32vec4 = vec_xl(0, x+(i+20)); x6_fp32vec4 = vec_xl(0, x+(i+24)); x7_fp32vec4 = vec_xl(0, x+(i+28)); x8_fp32vec4 = vec_xl(0, x+(i+32)); x9_fp32vec4 = vec_xl(0, x+(i+36)); x10_fp32vec4 = vec_xl(0, x+(i+40)); x11_fp32vec4 = vec_xl(0, x+(i+44)); y0_fp32vec4 = vec_madd(y0_fp32vec4, c_fp32vec4, x0_fp32vec4); y1_fp32vec4 = vec_madd(y1_fp32vec4, c_fp32vec4, x1_fp32vec4); y2_fp32vec4 = vec_madd(y2_fp32vec4, c_fp32vec4, x2_fp32vec4); y3_fp32vec4 = vec_madd(y3_fp32vec4, c_fp32vec4, x3_fp32vec4); y4_fp32vec4 = vec_madd(y4_fp32vec4, c_fp32vec4, x4_fp32vec4); y5_fp32vec4 = vec_madd(y5_fp32vec4, c_fp32vec4, x5_fp32vec4); y6_fp32vec4 = vec_madd(y6_fp32vec4, c_fp32vec4, x6_fp32vec4); y7_fp32vec4 = vec_madd(y7_fp32vec4, c_fp32vec4, x7_fp32vec4); y8_fp32vec4 = vec_madd(y8_fp32vec4, c_fp32vec4, x8_fp32vec4); y9_fp32vec4 = vec_madd(y9_fp32vec4, c_fp32vec4, x9_fp32vec4); y10_fp32vec4 = vec_madd(y10_fp32vec4, c_fp32vec4, x10_fp32vec4); y11_fp32vec4 = vec_madd(y11_fp32vec4, c_fp32vec4, x11_fp32vec4); vec_xst(y0_fp32vec4, 0, z+(i )); vec_xst(y1_fp32vec4, 0, z+(i+4 )); vec_xst(y2_fp32vec4, 0, z+(i+8 )); vec_xst(y3_fp32vec4, 0, z+(i+12 )); vec_xst(y4_fp32vec4, 0, z+(i+16 )); vec_xst(y5_fp32vec4, 0, z+(i+20)); vec_xst(y6_fp32vec4, 0, z+(i+24)); vec_xst(y7_fp32vec4, 0, z+(i+28)); vec_xst(y8_fp32vec4, 0, z+(i+32)); vec_xst(y9_fp32vec4, 0, z+(i+36)); vec_xst(y10_fp32vec4, 0, z+(i+40)); vec_xst(y11_fp32vec4, 0, z+(i+44)); } for (; i <= n-16; i += 16) { y0_fp32vec4 = vec_xl(0, y+(i )); y1_fp32vec4 = vec_xl(0, y+(i+4 )); y2_fp32vec4 = vec_xl(0, y+(i+8 )); y3_fp32vec4 = vec_xl(0, y+(i+12 )); x0_fp32vec4 = vec_xl(0, x+(i )); x1_fp32vec4 = vec_xl(0, x+(i+4 )); x2_fp32vec4 = vec_xl(0, x+(i+8 )); x3_fp32vec4 = vec_xl(0, x+(i+12 )); y0_fp32vec4 = vec_madd(y0_fp32vec4, c_fp32vec4, x0_fp32vec4); y1_fp32vec4 = vec_madd(y1_fp32vec4, c_fp32vec4, x1_fp32vec4); y2_fp32vec4 = vec_madd(y2_fp32vec4, c_fp32vec4, x2_fp32vec4); y3_fp32vec4 = vec_madd(y3_fp32vec4, c_fp32vec4, x3_fp32vec4); vec_xst(y0_fp32vec4, 0, z+(i )); vec_xst(y1_fp32vec4, 0, z+(i+4 )); vec_xst(y2_fp32vec4, 0, z+(i+8 )); vec_xst(y3_fp32vec4, 0, z+(i+12 )); } for (; i <= n-4; i += 4) { y0_fp32vec4 = vec_xl(0, y+(i )); x0_fp32vec4 = vec_xl(0, x+(i )); y0_fp32vec4 = vec_madd(y0_fp32vec4, c_fp32vec4, x0_fp32vec4); vec_xst(y0_fp32vec4, 0, z+(i )); } for (; i < n; i++) z[i] = x[i] + c* y[i]; } //-------------------------------------------------------------------------------------------------- // THFloatVector_adds_VSX: //-------------------------------------------------------------------------------------------------- static void THFloatVector_adds_VSX(float *y, const float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; float val[4] = {c, c, c, c}; vector float c_fp32vec4 = vec_xl(0, val); vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4; vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4; vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4; vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4; for (i = 0; i <= n-48; i += 48) { x0_fp32vec4 = vec_xl(0, x+(i )); x1_fp32vec4 = vec_xl(0, x+(i+4 )); x2_fp32vec4 = vec_xl(0, x+(i+8 )); x3_fp32vec4 = vec_xl(0, x+(i+12)); x4_fp32vec4 = vec_xl(0, x+(i+16)); x5_fp32vec4 = vec_xl(0, x+(i+20)); x6_fp32vec4 = vec_xl(0, x+(i+24)); x7_fp32vec4 = vec_xl(0, x+(i+28)); x8_fp32vec4 = vec_xl(0, x+(i+32)); x9_fp32vec4 = vec_xl(0, x+(i+36)); x10_fp32vec4 = vec_xl(0, x+(i+40)); x11_fp32vec4 = vec_xl(0, x+(i+44)); y0_fp32vec4 = vec_add(x0_fp32vec4, c_fp32vec4); y1_fp32vec4 = vec_add(x1_fp32vec4, c_fp32vec4); y2_fp32vec4 = vec_add(x2_fp32vec4, c_fp32vec4); y3_fp32vec4 = vec_add(x3_fp32vec4, c_fp32vec4); y4_fp32vec4 = vec_add(x4_fp32vec4, c_fp32vec4); y5_fp32vec4 = vec_add(x5_fp32vec4, c_fp32vec4); y6_fp32vec4 = vec_add(x6_fp32vec4, c_fp32vec4); y7_fp32vec4 = vec_add(x7_fp32vec4, c_fp32vec4); y8_fp32vec4 = vec_add(x8_fp32vec4, c_fp32vec4); y9_fp32vec4 = vec_add(x9_fp32vec4, c_fp32vec4); y10_fp32vec4 = vec_add(x10_fp32vec4, c_fp32vec4); y11_fp32vec4 = vec_add(x11_fp32vec4, c_fp32vec4); vec_xst(y0_fp32vec4, 0, y+(i )); vec_xst(y1_fp32vec4, 0, y+(i+4 )); vec_xst(y2_fp32vec4, 0, y+(i+8 )); vec_xst(y3_fp32vec4, 0, y+(i+12)); vec_xst(y4_fp32vec4, 0, y+(i+16)); vec_xst(y5_fp32vec4, 0, y+(i+20)); vec_xst(y6_fp32vec4, 0, y+(i+24)); vec_xst(y7_fp32vec4, 0, y+(i+28)); vec_xst(y8_fp32vec4, 0, y+(i+32)); vec_xst(y9_fp32vec4, 0, y+(i+36)); vec_xst(y10_fp32vec4, 0, y+(i+40)); vec_xst(y11_fp32vec4, 0, y+(i+44)); } for (; i <= n-16; i += 16) { x0_fp32vec4 = vec_xl(0, x+(i )); x1_fp32vec4 = vec_xl(0, x+(i+4 )); x2_fp32vec4 = vec_xl(0, x+(i+8 )); x3_fp32vec4 = vec_xl(0, x+(i+12)); y0_fp32vec4 = vec_add(x0_fp32vec4, c_fp32vec4); y1_fp32vec4 = vec_add(x1_fp32vec4, c_fp32vec4); y2_fp32vec4 = vec_add(x2_fp32vec4, c_fp32vec4); y3_fp32vec4 = vec_add(x3_fp32vec4, c_fp32vec4); vec_xst(y0_fp32vec4, 0, y+(i )); vec_xst(y1_fp32vec4, 0, y+(i+4 )); vec_xst(y2_fp32vec4, 0, y+(i+8 )); vec_xst(y3_fp32vec4, 0, y+(i+12)); } for (; i <= n-4; i += 4) { x0_fp32vec4 = vec_xl(0, x+(i )); y0_fp32vec4 = vec_add(x0_fp32vec4, c_fp32vec4); vec_xst(y0_fp32vec4, 0, y+(i )); } for (; i < n; i++) y[i] = c + x[i]; } //-------------------------------------------------------------------------------------------------- // THFloatVector_cmul_VSX: //-------------------------------------------------------------------------------------------------- static void THFloatVector_cmul_VSX(float *z, const float *y, const float *x, const ptrdiff_t n) { ptrdiff_t i; vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4; vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4; vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4; vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4; for (i = 0; i <= n-48; i += 48) { y0_fp32vec4 = vec_xl(0, y+(i )); y1_fp32vec4 = vec_xl(0, y+(i+4 )); y2_fp32vec4 = vec_xl(0, y+(i+8 )); y3_fp32vec4 = vec_xl(0, y+(i+12 )); y4_fp32vec4 = vec_xl(0, y+(i+16 )); y5_fp32vec4 = vec_xl(0, y+(i+20)); y6_fp32vec4 = vec_xl(0, y+(i+24)); y7_fp32vec4 = vec_xl(0, y+(i+28)); y8_fp32vec4 = vec_xl(0, y+(i+32)); y9_fp32vec4 = vec_xl(0, y+(i+36)); y10_fp32vec4 = vec_xl(0, y+(i+40)); y11_fp32vec4 = vec_xl(0, y+(i+44)); x0_fp32vec4 = vec_xl(0, x+(i )); x1_fp32vec4 = vec_xl(0, x+(i+4 )); x2_fp32vec4 = vec_xl(0, x+(i+8 )); x3_fp32vec4 = vec_xl(0, x+(i+12 )); x4_fp32vec4 = vec_xl(0, x+(i+16 )); x5_fp32vec4 = vec_xl(0, x+(i+20)); x6_fp32vec4 = vec_xl(0, x+(i+24)); x7_fp32vec4 = vec_xl(0, x+(i+28)); x8_fp32vec4 = vec_xl(0, x+(i+32)); x9_fp32vec4 = vec_xl(0, x+(i+36)); x10_fp32vec4 = vec_xl(0, x+(i+40)); x11_fp32vec4 = vec_xl(0, x+(i+44)); y0_fp32vec4 = vec_mul(y0_fp32vec4, x0_fp32vec4); y1_fp32vec4 = vec_mul(y1_fp32vec4, x1_fp32vec4); y2_fp32vec4 = vec_mul(y2_fp32vec4, x2_fp32vec4); y3_fp32vec4 = vec_mul(y3_fp32vec4, x3_fp32vec4); y4_fp32vec4 = vec_mul(y4_fp32vec4, x4_fp32vec4); y5_fp32vec4 = vec_mul(y5_fp32vec4, x5_fp32vec4); y6_fp32vec4 = vec_mul(y6_fp32vec4, x6_fp32vec4); y7_fp32vec4 = vec_mul(y7_fp32vec4, x7_fp32vec4); y8_fp32vec4 = vec_mul(y8_fp32vec4, x8_fp32vec4); y9_fp32vec4 = vec_mul(y9_fp32vec4, x9_fp32vec4); y10_fp32vec4 = vec_mul(y10_fp32vec4, x10_fp32vec4); y11_fp32vec4 = vec_mul(y11_fp32vec4, x11_fp32vec4); vec_xst(y0_fp32vec4, 0, z+(i )); vec_xst(y1_fp32vec4, 0, z+(i+4 )); vec_xst(y2_fp32vec4, 0, z+(i+8 )); vec_xst(y3_fp32vec4, 0, z+(i+12 )); vec_xst(y4_fp32vec4, 0, z+(i+16 )); vec_xst(y5_fp32vec4, 0, z+(i+20)); vec_xst(y6_fp32vec4, 0, z+(i+24)); vec_xst(y7_fp32vec4, 0, z+(i+28)); vec_xst(y8_fp32vec4, 0, z+(i+32)); vec_xst(y9_fp32vec4, 0, z+(i+36)); vec_xst(y10_fp32vec4, 0, z+(i+40)); vec_xst(y11_fp32vec4, 0, z+(i+44)); } for (; i <= n-16; i += 16) { y0_fp32vec4 = vec_xl(0, y+(i )); y1_fp32vec4 = vec_xl(0, y+(i+4 )); y2_fp32vec4 = vec_xl(0, y+(i+8 )); y3_fp32vec4 = vec_xl(0, y+(i+12 )); x0_fp32vec4 = vec_xl(0, x+(i )); x1_fp32vec4 = vec_xl(0, x+(i+4 )); x2_fp32vec4 = vec_xl(0, x+(i+8 )); x3_fp32vec4 = vec_xl(0, x+(i+12 )); y0_fp32vec4 = vec_mul(y0_fp32vec4, x0_fp32vec4); y1_fp32vec4 = vec_mul(y1_fp32vec4, x1_fp32vec4); y2_fp32vec4 = vec_mul(y2_fp32vec4, x2_fp32vec4); y3_fp32vec4 = vec_mul(y3_fp32vec4, x3_fp32vec4); vec_xst(y0_fp32vec4, 0, z+(i )); vec_xst(y1_fp32vec4, 0, z+(i+4 )); vec_xst(y2_fp32vec4, 0, z+(i+8 )); vec_xst(y3_fp32vec4, 0, z+(i+12 )); } for (; i <= n-4; i += 4) { y0_fp32vec4 = vec_xl(0, y+(i )); x0_fp32vec4 = vec_xl(0, x+(i )); y0_fp32vec4 = vec_mul(y0_fp32vec4, x0_fp32vec4); vec_xst(y0_fp32vec4, 0, z+(i )); } for (; i < n; i++) z[i] = y[i] * x[i]; } //-------------------------------------------------------------------------------------------------- // THFloatVector_muls_VSX: //-------------------------------------------------------------------------------------------------- static void THFloatVector_muls_VSX(float *y, const float *x, const float c, const ptrdiff_t n) { ptrdiff_t i; float val[4] = {c, c, c, c}; vector float c_fp32vec4 = vec_xl(0, val); vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4; vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4; vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4; vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4; for (i = 0; i <= n-48; i += 48) { x0_fp32vec4 = vec_xl(0, x+(i )); x1_fp32vec4 = vec_xl(0, x+(i+4 )); x2_fp32vec4 = vec_xl(0, x+(i+8 )); x3_fp32vec4 = vec_xl(0, x+(i+12)); x4_fp32vec4 = vec_xl(0, x+(i+16)); x5_fp32vec4 = vec_xl(0, x+(i+20)); x6_fp32vec4 = vec_xl(0, x+(i+24)); x7_fp32vec4 = vec_xl(0, x+(i+28)); x8_fp32vec4 = vec_xl(0, x+(i+32)); x9_fp32vec4 = vec_xl(0, x+(i+36)); x10_fp32vec4 = vec_xl(0, x+(i+40)); x11_fp32vec4 = vec_xl(0, x+(i+44)); y0_fp32vec4 = vec_mul(x0_fp32vec4, c_fp32vec4); y1_fp32vec4 = vec_mul(x1_fp32vec4, c_fp32vec4); y2_fp32vec4 = vec_mul(x2_fp32vec4, c_fp32vec4); y3_fp32vec4 = vec_mul(x3_fp32vec4, c_fp32vec4); y4_fp32vec4 = vec_mul(x4_fp32vec4, c_fp32vec4); y5_fp32vec4 = vec_mul(x5_fp32vec4, c_fp32vec4); y6_fp32vec4 = vec_mul(x6_fp32vec4, c_fp32vec4); y7_fp32vec4 = vec_mul(x7_fp32vec4, c_fp32vec4); y8_fp32vec4 = vec_mul(x8_fp32vec4, c_fp32vec4); y9_fp32vec4 = vec_mul(x9_fp32vec4, c_fp32vec4); y10_fp32vec4 = vec_mul(x10_fp32vec4, c_fp32vec4); y11_fp32vec4 = vec_mul(x11_fp32vec4, c_fp32vec4); vec_xst(y0_fp32vec4, 0, y+(i )); vec_xst(y1_fp32vec4, 0, y+(i+4 )); vec_xst(y2_fp32vec4, 0, y+(i+8 )); vec_xst(y3_fp32vec4, 0, y+(i+12)); vec_xst(y4_fp32vec4, 0, y+(i+16)); vec_xst(y5_fp32vec4, 0, y+(i+20)); vec_xst(y6_fp32vec4, 0, y+(i+24)); vec_xst(y7_fp32vec4, 0, y+(i+28)); vec_xst(y8_fp32vec4, 0, y+(i+32)); vec_xst(y9_fp32vec4, 0, y+(i+36)); vec_xst(y10_fp32vec4, 0, y+(i+40)); vec_xst(y11_fp32vec4, 0, y+(i+44)); } for (; i <= n-16; i += 16) { x0_fp32vec4 = vec_xl(0, x+(i )); x1_fp32vec4 = vec_xl(0, x+(i+4 )); x2_fp32vec4 = vec_xl(0, x+(i+8 )); x3_fp32vec4 = vec_xl(0, x+(i+12)); y0_fp32vec4 = vec_mul(x0_fp32vec4, c_fp32vec4); y1_fp32vec4 = vec_mul(x1_fp32vec4, c_fp32vec4); y2_fp32vec4 = vec_mul(x2_fp32vec4, c_fp32vec4); y3_fp32vec4 = vec_mul(x3_fp32vec4, c_fp32vec4); vec_xst(y0_fp32vec4, 0, y+(i )); vec_xst(y1_fp32vec4, 0, y+(i+4 )); vec_xst(y2_fp32vec4, 0, y+(i+8 )); vec_xst(y3_fp32vec4, 0, y+(i+12)); } for (; i <= n-4; i += 4) { x0_fp32vec4 = vec_xl(0, x+(i )); y0_fp32vec4 = vec_mul(x0_fp32vec4, c_fp32vec4); vec_xst(y0_fp32vec4, 0, y+(i )); } for (; i < n; i++) y[i] = c * x[i]; } //-------------------------------------------------------------------------------------------------- // THFloatVector_cdiv_VSX: //-------------------------------------------------------------------------------------------------- static void THFloatVector_cdiv_VSX(float *z, const float *x, const float *y, const ptrdiff_t n) { ptrdiff_t i; vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4; vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4; vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4; vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4; for (i = 0; i <= n-48; i += 48) { y0_fp32vec4 = vec_xl(0, y+(i )); y1_fp32vec4 = vec_xl(0, y+(i+4)); y2_fp32vec4 = vec_xl(0, y+(i+8)); y3_fp32vec4 = vec_xl(0, y+(i+12)); y4_fp32vec4 = vec_xl(0, y+(i+16)); y5_fp32vec4 = vec_xl(0, y+(i+20)); y6_fp32vec4 = vec_xl(0, y+(i+24)); y7_fp32vec4 = vec_xl(0, y+(i+28)); y8_fp32vec4 = vec_xl(0, y+(i+32)); y9_fp32vec4 = vec_xl(0, y+(i+36)); y10_fp32vec4 = vec_xl(0, y+(i+40)); y11_fp32vec4 = vec_xl(0, y+(i+44)); x0_fp32vec4 = vec_xl(0, x+(i )); x1_fp32vec4 = vec_xl(0, x+(i+4 )); x2_fp32vec4 = vec_xl(0, x+(i+8 )); x3_fp32vec4 = vec_xl(0, x+(i+12 )); x4_fp32vec4 = vec_xl(0, x+(i+16 )); x5_fp32vec4 = vec_xl(0, x+(i+20)); x6_fp32vec4 = vec_xl(0, x+(i+24)); x7_fp32vec4 = vec_xl(0, x+(i+28)); x8_fp32vec4 = vec_xl(0, x+(i+32)); x9_fp32vec4 = vec_xl(0, x+(i+36)); x10_fp32vec4 = vec_xl(0, x+(i+40)); x11_fp32vec4 = vec_xl(0, x+(i+44)); y0_fp32vec4 = vec_div(x0_fp32vec4, y0_fp32vec4); y1_fp32vec4 = vec_div(x1_fp32vec4, y1_fp32vec4); y2_fp32vec4 = vec_div(x2_fp32vec4, y2_fp32vec4); y3_fp32vec4 = vec_div(x3_fp32vec4, y3_fp32vec4); y4_fp32vec4 = vec_div(x4_fp32vec4, y4_fp32vec4); y5_fp32vec4 = vec_div(x5_fp32vec4, y5_fp32vec4); y6_fp32vec4 = vec_div(x6_fp32vec4, y6_fp32vec4); y7_fp32vec4 = vec_div(x7_fp32vec4, y7_fp32vec4); y8_fp32vec4 = vec_div(x8_fp32vec4, y8_fp32vec4); y9_fp32vec4 = vec_div(x9_fp32vec4, y9_fp32vec4); y10_fp32vec4 = vec_div(x10_fp32vec4, y10_fp32vec4); y11_fp32vec4 = vec_div(x11_fp32vec4, y11_fp32vec4); vec_xst(y0_fp32vec4, 0, z+(i )); vec_xst(y1_fp32vec4, 0, z+(i+4 )); vec_xst(y2_fp32vec4, 0, z+(i+8 )); vec_xst(y3_fp32vec4, 0, z+(i+12 )); vec_xst(y4_fp32vec4, 0, z+(i+16 )); vec_xst(y5_fp32vec4, 0, z+(i+20)); vec_xst(y6_fp32vec4, 0, z+(i+24)); vec_xst(y7_fp32vec4, 0, z+(i+28)); vec_xst(y8_fp32vec4, 0, z+(i+32)); vec_xst(y9_fp32vec4, 0, z+(i+36)); vec_xst(y10_fp32vec4, 0, z+(i+40)); vec_xst(y11_fp32vec4, 0, z+(i+44)); } for (; i <= n-16; i += 16) { y0_fp32vec4 = vec_xl(0, y+(i )); y1_fp32vec4 = vec_xl(0, y+(i+4 )); y2_fp32vec4 = vec_xl(0, y+(i+8 )); y3_fp32vec4 = vec_xl(0, y+(i+12 )); x0_fp32vec4 = vec_xl(0, x+(i )); x1_fp32vec4 = vec_xl(0, x+(i+4 )); x2_fp32vec4 = vec_xl(0, x+(i+8 )); x3_fp32vec4 = vec_xl(0, x+(i+12 )); y0_fp32vec4 = vec_div(x0_fp32vec4, y0_fp32vec4); y1_fp32vec4 = vec_div(x1_fp32vec4, y1_fp32vec4); y2_fp32vec4 = vec_div(x2_fp32vec4, y2_fp32vec4); y3_fp32vec4 = vec_div(x3_fp32vec4, y3_fp32vec4); vec_xst(y0_fp32vec4, 0, z+(i )); vec_xst(y1_fp32vec4, 0, z+(i+4 )); vec_xst(y2_fp32vec4, 0, z+(i+8 )); vec_xst(y3_fp32vec4, 0, z+(i+12 )); } for (; i <= n-4; i += 4) { y0_fp32vec4 = vec_xl(0, y+(i )); x0_fp32vec4 = vec_xl(0, x+(i )); y0_fp32vec4 = vec_div(x0_fp32vec4, y0_fp32vec4); vec_xst(y0_fp32vec4, 0, z+(i )); } for (; i < n; i++) z[i] = x[i] / y[i]; } //-------------------------------------------------------------------------------------------------- // THFloatVector_divs_VSX: //-------------------------------------------------------------------------------------------------- static void THFloatVector_divs_VSX(float *y, const float*x, const float c, const ptrdiff_t n) { ptrdiff_t i; float val[4] = {c, c, c, c}; vector float c_fp64vec2 = vec_xl(0, val); vector float y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; vector float y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; vector float x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; vector float x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; for (i = 0; i <= n-48; i += 48) { x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+4 )); x2_fp64vec2 = vec_xl(0, x+(i+8 )); x3_fp64vec2 = vec_xl(0, x+(i+12 )); x4_fp64vec2 = vec_xl(0, x+(i+16 )); x5_fp64vec2 = vec_xl(0, x+(i+20)); x6_fp64vec2 = vec_xl(0, x+(i+24)); x7_fp64vec2 = vec_xl(0, x+(i+28)); x8_fp64vec2 = vec_xl(0, x+(i+32)); x9_fp64vec2 = vec_xl(0, x+(i+36)); x10_fp64vec2 = vec_xl(0, x+(i+40)); x11_fp64vec2 = vec_xl(0, x+(i+44)); y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2); y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2); y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2); y4_fp64vec2 = vec_div(x4_fp64vec2, c_fp64vec2); y5_fp64vec2 = vec_div(x5_fp64vec2, c_fp64vec2); y6_fp64vec2 = vec_div(x6_fp64vec2, c_fp64vec2); y7_fp64vec2 = vec_div(x7_fp64vec2, c_fp64vec2); y8_fp64vec2 = vec_div(x8_fp64vec2, c_fp64vec2); y9_fp64vec2 = vec_div(x9_fp64vec2, c_fp64vec2); y10_fp64vec2 = vec_div(x10_fp64vec2, c_fp64vec2); y11_fp64vec2 = vec_div(x11_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); vec_xst(y1_fp64vec2, 0, y+(i+4 )); vec_xst(y2_fp64vec2, 0, y+(i+8 )); vec_xst(y3_fp64vec2, 0, y+(i+12 )); vec_xst(y4_fp64vec2, 0, y+(i+16 )); vec_xst(y5_fp64vec2, 0, y+(i+20)); vec_xst(y6_fp64vec2, 0, y+(i+24)); vec_xst(y7_fp64vec2, 0, y+(i+28)); vec_xst(y8_fp64vec2, 0, y+(i+32)); vec_xst(y9_fp64vec2, 0, y+(i+36)); vec_xst(y10_fp64vec2, 0, y+(i+40)); vec_xst(y11_fp64vec2, 0, y+(i+44)); } for (; i <= n-16; i += 16) { x0_fp64vec2 = vec_xl(0, x+(i )); x1_fp64vec2 = vec_xl(0, x+(i+4 )); x2_fp64vec2 = vec_xl(0, x+(i+8 )); x3_fp64vec2 = vec_xl(0, x+(i+12 )); y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2); y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2); y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); vec_xst(y1_fp64vec2, 0, y+(i+4 )); vec_xst(y2_fp64vec2, 0, y+(i+8 )); vec_xst(y3_fp64vec2, 0, y+(i+12 )); vec_xst(y0_fp64vec2, 0, y+(i )); vec_xst(y1_fp64vec2, 0, y+(i+4 )); vec_xst(y2_fp64vec2, 0, y+(i+8 )); vec_xst(y3_fp64vec2, 0, y+(i+16 )); } for (; i <= n-4; i += 4) { x0_fp64vec2 = vec_xl(0, x+(i )); y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); vec_xst(y0_fp64vec2, 0, y+(i )); } for (; i < n; i++) y[i] = x[i] / c; } //------------------------------------------------ // // Testing for correctness and performance // // If you want to run these tests, compile this // file with -DRUN_VSX_TESTS on a Power machine, // and then run the executable that is generated. // //------------------------------------------------ // // Example passing run (from a Power8 machine): // // $ gcc VSX.c -O2 -D RUN_VSX_TESTS -o vsxtest // $ ./vsxtest // // TODO // // // Finished running all tests. All tests PASSED. // //------------------------------------------------ #ifdef RUN_VSX_TESTS #include #include #include #include #include #define VSX_PERF_NUM_TEST_ELEMENTS 100000000 #define VSX_FUNC_NUM_TEST_ELEMENTS 2507 //-------------------------------------------------------------------------------------------------- // Standard implementations: //-------------------------------------------------------------------------------------------------- static void standardDouble_fill(double *x, const double c, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) x[i] = c; } static void standardFloat_fill(float *x, const float c, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) x[i] = c; } static void standardDouble_cadd(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) z[i] = x[i] + c * y[i]; } static void standardFloat_cadd(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) z[i] = x[i] + c * y[i]; } static void standardDouble_adds(double *y, const double *x, const double c, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) y[i] = c + x[i]; } static void standardFloat_adds(float *y, const float *x, const float c, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) y[i] = c + x[i]; } static void standardDouble_cmul(double *z, const double *x, const double *y, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) z[i] = x[i] * y[i]; } static void standardFloat_cmul(float *z, const float *x, const float *y, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) z[i] = x[i] * y[i]; } static void standardDouble_muls(double *y, const double *x, const double c, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) y[i] = c * x[i]; } static void standardFloat_muls(float *y, const float *x, const float c, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) y[i] = c * x[i]; } static void standardDouble_cdiv(double *z, const double *x, const double *y, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) z[i] = x[i] / y[i]; } static void standardFloat_cdiv(float *z, const float *x, const float *y, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) z[i] = x[i] / y[i]; } static void standardDouble_divs(double *y, const double *x, const double c, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) y[i] = x[i] / c; } static void standardFloat_divs(float *y, const float *x, const float c, const ptrdiff_t n) { for (ptrdiff_t i = 0; i < n; i++) y[i] = x[i] / c; } double randDouble() { return (double)(rand()%100)/(double)(rand()%100) * (rand()%2 ? -1.0 : 1.0); } int near(double a, double b) { int aClass = fpclassify(a); int bClass = fpclassify(b); if(aClass != bClass) // i.e. is it NAN, infinite, or finite...? return 0; if(aClass == FP_INFINITE) // if it is infinite, the sign must be the same, i.e. positive infinity is not near negative infinity return (signbit(a) == signbit(b)); else if(aClass == FP_NORMAL) // if it is a normal number then check the magnitude of the difference between the numbers return fabs(a - b) < 0.001; else // if both number are of the same class as each other and are of any other class (i.e. such as NAN), then they are near to each other. return 1; } //-------------------------------------------------------------------------------------------------- // Standard tests: //-------------------------------------------------------------------------------------------------- void test_THDoubleVector_fill_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; double *x_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *x_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double yVal0 = 17.2; double yVal1 = 8.2; double yVal2 = 5.1; double yVal3 = -0.9; //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardDouble_fill(x_standard, yVal0, VSX_PERF_NUM_TEST_ELEMENTS ); standardDouble_fill(x_standard, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1); standardDouble_fill(x_standard, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2); standardDouble_fill(x_standard, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardDouble_fill() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THDoubleVector_fill_VSX(x_optimized, yVal0, VSX_PERF_NUM_TEST_ELEMENTS ); THDoubleVector_fill_VSX(x_optimized, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1); THDoubleVector_fill_VSX(x_optimized, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2); THDoubleVector_fill_VSX(x_optimized, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THDoubleVector_fill_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- yVal0 += 1.0; yVal1 += 1.0; yVal2 += 1.0; yVal3 -= 1.0; standardDouble_fill( x_standard, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS); THDoubleVector_fill_VSX(x_optimized, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) assert(x_optimized[i] == yVal0); standardDouble_fill( x_standard+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2); THDoubleVector_fill_VSX(x_optimized+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardDouble_fill( x_standard+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4); THDoubleVector_fill_VSX(x_optimized+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardDouble_fill( x_standard+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6); THDoubleVector_fill_VSX(x_optimized+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardDouble_fill( x_standard+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THDoubleVector_fill_VSX(x_optimized+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardDouble_fill( x_standard+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THDoubleVector_fill_VSX(x_optimized+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) assert(x_optimized[i] == x_standard[i]); printf("All assertions PASSED for THDoubleVector_fill_VSX() test.\n\n"); free(x_standard); free(x_optimized); } void test_THFloatVector_fill_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; float *x_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *x_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float yVal0 = 17.2; float yVal1 = 8.2; float yVal2 = 5.1; float yVal3 = -0.9; //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardFloat_fill(x_standard, yVal0, VSX_PERF_NUM_TEST_ELEMENTS ); standardFloat_fill(x_standard, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1); standardFloat_fill(x_standard, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2); standardFloat_fill(x_standard, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardFloat_fill() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THFloatVector_fill_VSX(x_optimized, yVal0, VSX_PERF_NUM_TEST_ELEMENTS ); THFloatVector_fill_VSX(x_optimized, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1); THFloatVector_fill_VSX(x_optimized, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2); THFloatVector_fill_VSX(x_optimized, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THFloatVector_fill_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- yVal0 += 1.0; yVal1 += 1.0; yVal2 += 1.0; yVal3 -= 1.0; standardFloat_fill( x_standard, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS); THFloatVector_fill_VSX(x_optimized, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) assert(x_optimized[i] == yVal0); standardFloat_fill( x_standard+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2); THFloatVector_fill_VSX(x_optimized+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardFloat_fill( x_standard+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4); THFloatVector_fill_VSX(x_optimized+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardFloat_fill( x_standard+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6); THFloatVector_fill_VSX(x_optimized+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardFloat_fill( x_standard+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THFloatVector_fill_VSX(x_optimized+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardFloat_fill( x_standard+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THFloatVector_fill_VSX(x_optimized+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) assert(x_optimized[i] == x_standard[i]); printf("All assertions PASSED for THFloatVector_fill_VSX() test.\n\n"); free(x_standard); free(x_optimized); } void test_THDoubleVector_cadd_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; double *z_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *y = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double c = randDouble(); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) { x[i] = randDouble(); y[i] = randDouble(); } //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS ); standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1); standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2); standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardDouble_cadd() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS ); THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1); THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2); THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THDoubleVector_cadd_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardDouble_cadd( z_standard+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); THDoubleVector_cadd_VSX(z_optimized+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardDouble_cadd( z_standard+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); THDoubleVector_cadd_VSX(z_optimized+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardDouble_cadd( z_standard+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); THDoubleVector_cadd_VSX(z_optimized+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardDouble_cadd( z_standard+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THDoubleVector_cadd_VSX(z_optimized+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardDouble_cadd( z_standard+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THDoubleVector_cadd_VSX(z_optimized+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(z_optimized[i], z_standard[i])) printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); assert(near(z_optimized[i], z_standard[i])); } printf("All assertions PASSED for THDoubleVector_cadd_VSX() test.\n\n"); free(z_standard); free(z_optimized); free(x); } void test_THFloatVector_cadd_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; float *z_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *y = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float c = (float)randDouble(); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) { x[i] = (float)randDouble(); y[i] = (float)randDouble(); } //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS ); standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1); standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2); standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardFloat_cadd() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS ); THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1); THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2); THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THFloatVector_cadd_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardFloat_cadd( z_standard+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); THFloatVector_cadd_VSX(z_optimized+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardFloat_cadd( z_standard+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); THFloatVector_cadd_VSX(z_optimized+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardFloat_cadd( z_standard+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); THFloatVector_cadd_VSX(z_optimized+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardFloat_cadd( z_standard+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THFloatVector_cadd_VSX(z_optimized+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardFloat_cadd( z_standard+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THFloatVector_cadd_VSX(z_optimized+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(z_optimized[i], z_standard[i])) printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); assert(near(z_optimized[i], z_standard[i])); } printf("All assertions PASSED for THFloatVector_cadd_VSX() test.\n\n"); free(z_standard); free(z_optimized); free(x); } void test_THDoubleVector_adds_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; double *y_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double c = randDouble(); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) x[i] = randDouble(); //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardDouble_adds() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THDoubleVector_adds_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardDouble_adds( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); THDoubleVector_adds_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardDouble_adds( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); THDoubleVector_adds_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardDouble_adds( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); THDoubleVector_adds_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardDouble_adds( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THDoubleVector_adds_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardDouble_adds( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THDoubleVector_adds_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(y_optimized[i], y_standard[i])) printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); assert(near(y_optimized[i], y_standard[i])); } printf("All assertions PASSED for THDoubleVector_adds_VSX() test.\n\n"); free(y_standard); free(y_optimized); free(x); } void test_THFloatVector_adds_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; float *y_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float c = (float)randDouble(); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) x[i] = (float)randDouble(); //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardFloat_adds() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THFloatVector_adds_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardFloat_adds( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); THFloatVector_adds_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardFloat_adds( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); THFloatVector_adds_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardFloat_adds( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); THFloatVector_adds_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardFloat_adds( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THFloatVector_adds_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardFloat_adds( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THFloatVector_adds_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(y_optimized[i], y_standard[i])) printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); assert(near(y_optimized[i], y_standard[i])); } printf("All assertions PASSED for THFloatVector_adds_VSX() test.\n\n"); free(y_standard); free(y_optimized); free(x); } void test_THDoubleVector_cmul_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; double *z_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *y = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) { x[i] = randDouble(); y[i] = randDouble(); } //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardDouble_cmul() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THDoubleVector_cmul_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardDouble_cmul( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); THDoubleVector_cmul_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardDouble_cmul( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); THDoubleVector_cmul_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardDouble_cmul( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); THDoubleVector_cmul_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardDouble_cmul( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THDoubleVector_cmul_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardDouble_cmul( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THDoubleVector_cmul_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(z_optimized[i], z_standard[i])) printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); assert(near(z_optimized[i], z_standard[i])); } printf("All assertions PASSED for THDoubleVector_cmul_VSX() test.\n\n"); free(z_standard); free(z_optimized); free(x); } void test_THFloatVector_cmul_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; float *z_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *y = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) { x[i] = (float)randDouble(); y[i] = (float)randDouble(); } //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardFloat_cmul() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THFloatVector_cmul_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardFloat_cmul( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); THFloatVector_cmul_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardFloat_cmul( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); THFloatVector_cmul_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardFloat_cmul( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); THFloatVector_cmul_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardFloat_cmul( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THFloatVector_cmul_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardFloat_cmul( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THFloatVector_cmul_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(z_optimized[i], z_standard[i])) printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); assert(near(z_optimized[i], z_standard[i])); } printf("All assertions PASSED for THFloatVector_cmul_VSX() test.\n\n"); free(z_standard); free(z_optimized); free(x); } void test_THDoubleVector_muls_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; double *y_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double c = randDouble(); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) { x[i] = randDouble(); } //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardDouble_muls() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THDoubleVector_muls_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardDouble_muls( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); THDoubleVector_muls_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardDouble_muls( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); THDoubleVector_muls_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardDouble_muls( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); THDoubleVector_muls_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardDouble_muls( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THDoubleVector_muls_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardDouble_muls( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THDoubleVector_muls_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(y_optimized[i], y_standard[i])) printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); assert(near(y_optimized[i], y_standard[i])); } printf("All assertions PASSED for THDoubleVector_muls_VSX() test.\n\n"); free(y_standard); free(y_optimized); free(x); } void test_THFloatVector_muls_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; float *y_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float c = (float)randDouble(); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) { x[i] = (float)randDouble(); } //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardFloat_muls() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THFloatVector_muls_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardFloat_muls( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); THFloatVector_muls_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardFloat_muls( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); THFloatVector_muls_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardFloat_muls( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); THFloatVector_muls_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardFloat_muls( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THFloatVector_muls_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardFloat_muls( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THFloatVector_muls_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(y_optimized[i], y_standard[i])) printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); assert(near(y_optimized[i], y_standard[i])); } printf("All assertions PASSED for THFloatVector_muls_VSX() test.\n\n"); free(y_standard); free(y_optimized); free(x); } void test_THDoubleVector_cdiv_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; double *z_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *y = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) { x[i] = randDouble(); y[i] = randDouble(); } //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardDouble_cdiv() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THDoubleVector_cdiv_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardDouble_cdiv( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); THDoubleVector_cdiv_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardDouble_cdiv( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); THDoubleVector_cdiv_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardDouble_cdiv( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); THDoubleVector_cdiv_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardDouble_cdiv( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THDoubleVector_cdiv_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardDouble_cdiv( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THDoubleVector_cdiv_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(z_optimized[i], z_standard[i])) printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); assert(near(z_optimized[i], z_standard[i])); } printf("All assertions PASSED for THDoubleVector_cdiv_VSX() test.\n\n"); free(z_standard); free(z_optimized); free(x); } void test_THFloatVector_cdiv_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; float *z_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *y = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) { x[i] = (float)randDouble(); y[i] = (float)randDouble(); } //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardFloat_cdiv() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THFloatVector_cdiv_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardFloat_cdiv( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); THFloatVector_cdiv_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardFloat_cdiv( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); THFloatVector_cdiv_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardFloat_cdiv( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); THFloatVector_cdiv_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardFloat_cdiv( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THFloatVector_cdiv_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardFloat_cdiv( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THFloatVector_cdiv_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(z_optimized[i], z_standard[i])) printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); assert(near(z_optimized[i], z_standard[i])); } printf("All assertions PASSED for THFloatVector_cdiv_VSX() test.\n\n"); free(z_standard); free(z_optimized); free(x); } void test_THDoubleVector_divs_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; double *y_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); double c = randDouble(); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) { x[i] = randDouble(); } //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardDouble_divs() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THDoubleVector_divs_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardDouble_divs( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); THDoubleVector_divs_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardDouble_divs( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); THDoubleVector_divs_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardDouble_divs( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); THDoubleVector_divs_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardDouble_divs( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THDoubleVector_divs_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardDouble_divs( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THDoubleVector_divs_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(y_optimized[i], y_standard[i])) printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); assert(near(y_optimized[i], y_standard[i])); } printf("All assertions PASSED for THDoubleVector_divs_VSX() test.\n\n"); free(y_standard); free(y_optimized); free(x); } void test_THFloatVector_divs_VSX() { clock_t start, end; double elapsedSeconds_optimized, elapsedSeconds_standard; float *y_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); float c = (float)randDouble(); // Initialize randomly for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) { x[i] = (float)randDouble(); } //------------------------------------------------- // Performance Test //------------------------------------------------- start = clock(); standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; printf("standardFloat_divs() test took %.5lf seconds\n", elapsedSeconds_standard); start = clock(); THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); end = clock(); elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; printf("THFloatVector_divs_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); //------------------------------------------------- // Correctness Test //------------------------------------------------- standardFloat_divs( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); THFloatVector_divs_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); standardFloat_divs( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); THFloatVector_divs_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); standardFloat_divs( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); THFloatVector_divs_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); standardFloat_divs( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); THFloatVector_divs_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); int r = rand() % 258; standardFloat_divs( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); THFloatVector_divs_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) { if(!near(y_optimized[i], y_standard[i])) printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); assert(near(y_optimized[i], y_standard[i])); } printf("All assertions PASSED for THFloatVector_divs_VSX() test.\n\n"); free(y_standard); free(y_optimized); free(x); } //-------------------------------------------------------------------------------------------------- // Run tests: //-------------------------------------------------------------------------------------------------- int main() { printf("\n"); // First test utility functions assert(!near(0.1, -0.1)); assert(!near(0.1f, -0.1f)); assert(!near(9, 10)); assert(near(0.1, 0.1000001)); assert(near(0.1f, 0.1000001f)); assert(near(100.764, 100.764)); assert(!near(NAN, 0.0)); assert(!near(-9.5, NAN)); assert(!near(NAN, 100)); assert(!near(-0.0, NAN)); assert(near(NAN, NAN)); assert(near(INFINITY, INFINITY)); assert(near(-INFINITY, -INFINITY)); assert(!near(INFINITY, NAN)); assert(!near(0, INFINITY)); assert(!near(-999.4324, INFINITY)); assert(!near(INFINITY, 982374.1)); assert(!near(-INFINITY, INFINITY)); // Then test each vectorized function test_THDoubleVector_fill_VSX(); test_THFloatVector_fill_VSX(); test_THDoubleVector_cadd_VSX(); test_THFloatVector_cadd_VSX(); test_THDoubleVector_adds_VSX(); test_THFloatVector_adds_VSX(); test_THDoubleVector_cmul_VSX(); test_THFloatVector_cmul_VSX(); test_THDoubleVector_muls_VSX(); test_THFloatVector_muls_VSX(); test_THDoubleVector_cdiv_VSX(); test_THFloatVector_cdiv_VSX(); test_THDoubleVector_divs_VSX(); test_THFloatVector_divs_VSX(); printf("Finished running all tests. All tests PASSED.\n"); return 0; } #endif // defined RUN_VSX_TESTS #endif // defined __PPC64__ lib/luaT/000077500000000000000000000000001316246254300125375ustar00rootroot00000000000000lib/luaT/CMakeLists.txt000066400000000000000000000030221316246254300152740ustar00rootroot00000000000000# avoid some cmake warnings IF(POLICY CMP0026) CMAKE_POLICY(SET CMP0026 OLD) ENDIF() INCLUDE_DIRECTORIES(${LUA_INCDIR}) IF(LUALIB) LINK_DIRECTORIES(${LUA_LIBDIR}) # note: must be done before defining target ENDIF() ADD_LIBRARY(luaT SHARED luaT.h luaT.c) IF (BUILD_STATIC OR "$ENV{STATIC_TH}" STREQUAL "YES") ADD_LIBRARY(luaT_static STATIC luaT.h luaT.c) SET_TARGET_PROPERTIES(luaT_static PROPERTIES COMPILE_FLAGS "-fPIC") SET_TARGET_PROPERTIES(luaT_static PROPERTIES PREFIX "lib" IMPORT_PREFIX "lib" OUTPUT_NAME "luaT") ENDIF() SET_TARGET_PROPERTIES(luaT PROPERTIES VERSION 0 SOVERSION 0) IF(APPLE) SET_TARGET_PROPERTIES(luaT PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") ENDIF() IF(LUALIB) TARGET_LINK_LIBRARIES(luaT ${LUALIB}) # must be done after ;) ENDIF() INSTALL(TARGETS luaT EXPORT torch-exports RUNTIME DESTINATION "${Torch_INSTALL_BIN_SUBDIR}" LIBRARY DESTINATION "${Torch_INSTALL_LIB_SUBDIR}" ARCHIVE DESTINATION "${Torch_INSTALL_LIB_SUBDIR}") INSTALL(FILES luaT.h DESTINATION "${Torch_INSTALL_INCLUDE_SUBDIR}") # Create luaT.cmake GET_TARGET_PROPERTY(LUAT_OUTPUT_NAME luaT LOCATION) GET_FILENAME_COMPONENT(LUAT_OUTPUT_NAME ${LUAT_OUTPUT_NAME} NAME) SET(LUAT_LIBRARIES "${Torch_INSTALL_LIB}/${LUAT_OUTPUT_NAME}") SET(LUAT_INCLUDE_DIR "${Torch_INSTALL_INCLUDE}") CONFIGURE_FILE(luaTConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/luaTConfig.cmake") INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/luaTConfig.cmake" DESTINATION "${Torch_INSTALL_CMAKE_SUBDIR}") lib/luaT/README.md000066400000000000000000000265421316246254300140270ustar00rootroot00000000000000 # Lua Torch C API # luaT provides an API to interface Lua and C in Torch packages. It defines a concept of _classes_ to Lua for Torch, and provides a mechanism to easily handle these Lua classes from C. It additionally provides few functions that `luaL` should have defined, and defines several functions similar to `luaL` ones for better type error printing when using `luaT` classes. ## Memory functions ## Classical memory allocation functions which generate a Lua error in case of problem. ### void* luaT_alloc(lua_State *L, long size) ### Allocates `size` bytes, and return a pointer on the allocated memory. A Lua error will be generated if running out of memory. ### void* luaT_realloc(lua_State *L, void *ptr, long size) ### Realloc `ptr` to `size` bytes. `ptr` must have been previously allocated with [luaT_alloc](#luaT_alloc) or [luaT_realloc](#luaT_realloc), or the C `malloc` or `realloc` functions. A Lua error will be generated if running out of memory. ### void luaT_free(lua_State *L, void *ptr) ### Free memory allocated at address `ptr`. The memory must have been previously allocated with [luaT_alloc](#luaT_alloc) or [luaT_realloc](#luaT_realloc), or the C `malloc` or `realloc` functions. ## Class creation and basic handling ## A `luaT` class is basically either a Lua _table_ or _userdata_ with an appropriate _metatable_. This appropriate metatable is created with [luaT_newmetatable](#luaT_newmetatable). Contrary to luaL userdata functions, luaT mechanism handles inheritance. If the class inherit from another class, then the metatable will itself have a metatable corresponding to the _parent metatable_: the metatables are cascaded according to the class inheritance. Multiple inheritance is not supported. ### Operator overloading ### The metatable of a `luaT` object contains `Lua` operators like `__index`, `__newindex`, `__tostring`, `__add` (etc...). These operators will respectively look for `__index__`, `__newindex__`, `__tostring__`, `__add__` (etc...) in the metatable. If found, the corresponding function or value will be returned, else a Lua error will be raised. If one wants to provide `__index__` or `__newindex__` in the metaclass, these operators must follow a particular scheme: * `__index__` must either return a value _and_ `true` or return `false` only. In the first case, it means `__index__` was able to handle the given argument (for e.g., the type was correct). The second case means it was not able to do anything, so `__index` in the root metatable can then try to see if the metaclass contains the required value. * `__newindex__` must either return `true` or `false`. As for `__index__`, `true` means it could handle the argument and `false` not. If not, the root metatable `__newindex` will then raise an error if the object was a userdata, or apply a rawset if the object was a Lua table. Other metaclass operators like `__tostring__`, `__add__`, etc... do not have any particular constraint. ### const char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parenttname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx) ### This function creates a new metatable, which is the Lua way to define a new object class. As for `luaL_newmetatable`, the metatable is registered in the Lua registry table, with the key `tname`. In addition, `tname` is also registered in the Lua registry, with the metatable as key (the typename of a given object can be thus easily retrieved). The class name `tname` must be of the form `modulename.classname`. If not NULL, `parenttname` must be a valid typename corresponding to the parent class of the new class. If `constructor` is not NULL, a function `new` will be added to the metatable, pointing to this given function. A "constructor table" will be created by `luaT_newlocalmetatable`: it will contain all the class methods, and be callable, calling the `constructor`, if a `constructor` has been passed. The constructor table is either stored into `modulename.classname` (that is in the global namespace) if `moduleidx <= 0` or in the table at index `moduleidx` in the stack (if `moduleidx > 0`). If not NULL, `destructor` will be called when garbage collecting the object. If not NULL, `factory` must be a Lua C function creating an empty object instance of the class. This functions are used in Torch for serialization. Note that classes can be partly defined in C and partly defined in Lua: once the metatable is created in C, it can be filled up with additional methods in Lua. The return value is the value returned by [luaT_typenameid](#luat_typenameid). ### const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parenttname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory) ### Same as [luaT_newlocalmetatable](#luat_newmetatable), but where the constructor table is assigned in the global namespace (`moduleidx = 0`). ### int luaT_pushmetatable(lua_State *L, const name *tname) ### Push the metatable with type name `tname` on the stack, if `tname` is a valid Torch class name (previously registered with luaT_newmetatable). On success, returns 1. If `tname` is invalid, nothing is pushed and it returns 0. ### const char* luaT_typenameid(lua_State *L, const char *tname) ### If `tname` is a valid Torch class name, then returns a unique string (the contents will be the same as `tname`) pointing to the string registered in the Lua registry. This string is thus valid as long as Lua is running. The returned string shall not be freed. If `tname` is an invalid class name, returns NULL. ### const char* luaT_typename(lua_State *L, int ud) ### Returns the typename of the object at index `ud` on the stack. If it is not a valid Torch object, returns NULL. ### void luaT_pushudata(lua_State *L, void *udata, const char *tname) ### Given a C structure `udata`, push a userdata object on the stack with metatable corresponding to `tname`. Obviously, `tname` must be a valid Torch name registered with [luaT_newmetatable](#luat_newmetatable). ### void *luaT_toudata(lua_State *L, int ud, const char *tname) ### Returns a pointer to the original C structure previously pushed on the stack with [luaT_pushudata](#luat_pushudata), if the object at index `ud` is a valid Torch class name. Returns NULL otherwise. ### int luaT_isudata(lua_State *L, int ud, const char *tname) ### Returns 1 if the object at index `ud` on the stack is a valid Torch class name `tname`. Returns 0 otherwise. ### Checking fields of a table ### This functions check that the table at the given index `ud` on the Lua stack has a field named `field`, and that it is of the specified type. These function raises a Lua error on failure. ## void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname) ## Checks that the field named `field` of the table at index `ud` is a Torch class name `tname`. Returns the pointer of the C structure previously pushed on the stack with [luaT_pushudata](#luat_pushudata) on success. The function raises a Lua error on failure. ## void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field) ## Checks that the field named `field` of the table at index `ud` is a lightuserdata. Returns the lightuserdata pointer on success. The function raises a Lua error on failure. ## int luaT_getfieldcheckint(lua_State *L, int ud, const char *field) ## Checks that the field named `field` of the table at index `ud` is an int. Returns the int value pointer on success. The function raises a Lua error on failure. ## const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field) ## Checks that the field named `field` of the table at index `ud` is a string. Returns a pointer to the string on success. The function raises a Lua error on failure. ## int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field) ## Checks that the field named `field` of the table at index `ud` is a boolean. On success, returns 1 if the boolean is `true`, 0 if it is `false`. The function raises a Lua error on failure. ## void luaT_getfieldchecktable(lua_State *L, int ud, const char *field) ## Checks that the field named `field` of the table at index `ud` is a table. On success, push the table on the stack. The function raises a Lua error on failure. ### int luaT_typerror(lua_State *L, int ud, const char *tname) ### Raises a `luaL_argerror` (and returns its value), claiming that the object at index `ud` on the stack is not of type `tname`. Note that this function does not check the type, it only raises an error. ### int luaT_checkboolean(lua_State *L, int ud) ### Checks that the value at index `ud` is a boolean. On success, returns 1 if the boolean is `true`, 0 if it is `false`. The function raises a Lua error on failure. ### int luaT_optboolean(lua_State *L, int ud, int def) ### Checks that the value at index `ud` is a boolean. On success, returns 1 if the boolean is `true`, 0 if it is `false`. If there is no value at index `ud`, returns `def`. In any other cases, raises an error. ### void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name) ### This function assume a table is on the stack. It creates a table field `name` in the table (if this field does not exist yet), and fill up `methods` in this table field. ### const char *luaT_classrootname(const char *tname) ### Assuming `tname` is of the form `A.b.c`, returns 'c'. The returned value shall not be freed. It is a pointer inside `tname` string. ### int luaT_classmodulename(const char *tname, char *parent_name) ### Alias to `luaT_fullparentname ` for ensuring backwards compatibility; use of `luaT_fullparentname` is preferred. ### int luaT_fullparentname(const char *tname, char *parent_name) ### Returns a 0-1 valued integer indicating whether `tname` has a parent module. Assuming `tname` is of the form `A.b.c`, sets `parent_name` to `A.b`. ### int luaT_outerparentname(const char *tname, char *parent_name) ### Returns a 0-1 valued integer indicating whether `tname` has a parent module. Assuming `tname` is of the form `A.b.c`, sets `parent_name` to `A`. ### int luaT_innerparentname(const char *tname, char *parent_name) ### Returns a 0-1 valued integer indicating whether `tname` has a parent module. Assuming `tname` is of the form `A.b.c`, sets `parent_name` to `b`. ### void luaT_stackdump(lua_State *L) ### This function print outs the state of the Lua stack. It is useful for debug purposes. lib/luaT/luaT.c000066400000000000000000001130631316246254300136140ustar00rootroot00000000000000#include #include #include #include "luaT.h" void* luaT_alloc(lua_State *L, ptrdiff_t size) { void *ptr; if(size == 0) return NULL; if(size < 0) luaL_error(L, "$ Torch: invalid memory size -- maybe an overflow?"); ptr = malloc(size); if(!ptr) luaL_error(L, "$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!", size/1073741824); return ptr; } void* luaT_realloc(lua_State *L, void *ptr, ptrdiff_t size) { if(!ptr) return(luaT_alloc(L, size)); if(size == 0) { luaT_free(L, ptr); return NULL; } if(size < 0) luaL_error(L, "$ Torch: invalid memory size -- maybe an overflow?"); ptr = realloc(ptr, size); if(!ptr) luaL_error(L, "$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824); return ptr; } void luaT_free(lua_State *L, void *ptr) { free(ptr); } void luaT_setfuncs(lua_State *L, const luaL_Reg *l, int nup) { #if LUA_VERSION_NUM == 501 luaL_checkstack(L, nup+1, "too many upvalues"); for (; l->name != NULL; l++) { /* fill the table with given functions */ int i; lua_pushstring(L, l->name); for (i = 0; i < nup; i++) /* copy upvalues to the top */ lua_pushvalue(L, -(nup+1)); lua_pushcclosure(L, l->func, nup); /* closure with those upvalues */ lua_settable(L, -(nup + 3)); } lua_pop(L, nup); /* remove upvalues */ #else luaL_setfuncs(L, l, nup); #endif } void luaT_stackdump(lua_State *L) { int i; const char *tname = NULL; int top = lua_gettop(L); for(i = 1; i <= top; i++) { int t = lua_type(L, i); printf("%3d. ", i); switch(t) { case LUA_TSTRING: printf("'%s'", lua_tostring(L,i)); break; case LUA_TBOOLEAN: printf(lua_toboolean(L, i) ? "true" : "false"); break; case LUA_TNUMBER: printf("%g", lua_tonumber(L,i)); break; case LUA_TUSERDATA: tname = luaT_typename(L, i); printf("userdata %p [%s]", lua_topointer(L, i), (tname ? tname : "not a Torch object")); break; case 10: tname = luaT_typename(L, i); printf("cdata %p [%s]", lua_topointer(L, i), (tname ? tname : "not a Torch object")); break; case LUA_TTABLE: lua_pushvalue(L, i); lua_rawget(L, LUA_REGISTRYINDEX); if(lua_isstring(L, -1)) tname = lua_tostring(L, -1); /*luaT_typenameid(L, lua_tostring(L, -1)); */ else tname = NULL; lua_pop(L, 1); if(tname) printf("metatable [%s]", tname); else { tname = luaT_typename(L, i); printf("table %p [%s]", lua_topointer(L, i), (tname ? tname : "not a Torch object")); } break; default: printf("Lua object type: %s", lua_typename(L,t)); break; } printf("\n"); } printf("---------------------------------------------\n"); } /* metatable operator methods */ static int luaT_mt__index(lua_State *L); static int luaT_mt__newindex(lua_State *L); static int luaT_mt__tostring(lua_State *L); static int luaT_mt__add(lua_State *L); static int luaT_mt__sub(lua_State *L); static int luaT_mt__mul(lua_State *L); static int luaT_mt__div(lua_State *L); static int luaT_mt__mod(lua_State *L); static int luaT_mt__pow(lua_State *L); static int luaT_mt__unm(lua_State *L); static int luaT_mt__concat(lua_State *L); static int luaT_mt__len(lua_State *L); static int luaT_mt__eq(lua_State *L); static int luaT_mt__lt(lua_State *L); static int luaT_mt__le(lua_State *L); static int luaT_mt__call(lua_State *L); /* Constructor-metatable methods */ static int luaT_cmt__call(lua_State *L); static int luaT_cmt__newindex(lua_State *L); const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parent_tname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory) { return luaT_newlocalmetatable(L, tname, parent_tname, constructor, destructor, factory, 0); } const char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parent_tname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx) { lua_pushcfunction(L, luaT_lua_newmetatable); lua_pushstring(L, tname); (parent_tname ? (void)lua_pushstring(L, parent_tname) : lua_pushnil(L)); (constructor ? lua_pushcfunction(L, constructor) : lua_pushnil(L)); (destructor ? lua_pushcfunction(L, destructor) : lua_pushnil(L)); (factory ? lua_pushcfunction(L, factory) : lua_pushnil(L)); (moduleidx > 0 ? lua_pushvalue(L, moduleidx) : lua_pushnil(L)); lua_call(L, 6, 1); return luaT_typenameid(L, tname); } int luaT_pushmetatable(lua_State *L, const char *tname) { lua_getfield(L, LUA_REGISTRYINDEX, tname); if(lua_isnil(L, -1)) { lua_pop(L, 1); return 0; } return 1; } const char *luaT_typenameid(lua_State *L, const char *tname) { if(luaT_pushmetatable(L, tname)) { const char *tnameid = NULL; lua_rawget(L, LUA_REGISTRYINDEX); if(lua_isstring(L, -1)) tnameid = lua_tostring(L, -1); lua_pop(L, 1); /* the string/nil */ return tnameid; } return NULL; } static const char cdataname[] = "" "local ok, ffi = pcall(require, 'ffi')\n" "if ok then\n" " local id2name = {}\n" " return function(cdata, name)\n" " local id\n" " if jit then\n" " id = tonumber(ffi.typeof(cdata))\n" " else\n" " id = tostring(ffi.typeof(cdata))\n" " end\n" " if id then\n" " if name then\n" " id2name[id] = name\n" " return name\n" " else\n" " return rawget(id2name, id)\n" " end\n" " end\n" " return nil\n" " end\n" "else\n" " return function() end\n" "end\n"; static const char* luaT_cdataname(lua_State *L, int ud, const char *tname) { lua_pushstring(L, "__cdataname"); lua_rawget(L, LUA_REGISTRYINDEX); if(lua_isnil(L,-1)) { lua_pop(L, 1); if(luaL_dostring(L, cdataname)) /* did something go wrong? */ luaL_error(L, "internal error (could not load cdataname): %s", lua_tostring(L, -1)); lua_pushstring(L, "__cdataname"); lua_pushvalue(L, -2); lua_rawset(L, LUA_REGISTRYINDEX); } if(!lua_isfunction(L, -1)) /* should not happen */ luaL_error(L, "internal error (cdataname is not a function)"); lua_pushvalue(L, ud); if(tname) lua_pushstring(L, tname); if(lua_pcall(L, (tname ? 2 : 1), 1, 0)) luaL_error(L, "internal error (cdataname): %s", lua_tostring(L, -1)); tname = lua_tostring(L, -1); lua_pop(L, 1); return tname; } static void* CDATA_MT_KEY = &CDATA_MT_KEY; static const char cdatamt[] = "" "local ok, ffi = pcall(require, 'ffi')\n" "if ok and not jit then\n" " return ffi.debug().cdata_mt\n" "else\n" " return {}\n" "end\n"; static int luaT_iscdata(lua_State *L, int ud) { int type = lua_type(L, ud); if(type == 10) return 1; if(type != LUA_TUSERDATA) return 0; if(!lua_getmetatable(L, ud)) return 0; lua_pushlightuserdata(L, CDATA_MT_KEY); lua_rawget(L, LUA_REGISTRYINDEX); if (lua_isnil(L, -1)) { // initialize cdata metatable lua_pop(L, 1); if(luaL_dostring(L, cdatamt)) luaL_error(L, "internal error (could not load cdata mt): %s", lua_tostring(L, -1)); lua_pushlightuserdata(L, CDATA_MT_KEY); lua_pushvalue(L, -2); lua_rawset(L, LUA_REGISTRYINDEX); } int iscdata = lua_rawequal(L, -1, -2); lua_pop(L, 2); return iscdata; } const char* luaT_typename(lua_State *L, int ud) { if(luaT_iscdata(L, ud)) return luaT_cdataname(L, ud, NULL); else if(lua_getmetatable(L, ud)) { const char *tname = NULL; lua_rawget(L, LUA_REGISTRYINDEX); if(lua_isstring(L, -1)) tname = lua_tostring(L, -1); lua_pop(L, 1); /* the string/nil */ return tname; } return NULL; } void luaT_pushudata(lua_State *L, void *udata, const char *tname) { if(udata) { void **udata_p = lua_newuserdata(L, sizeof(void*)); *udata_p = udata; if(!luaT_pushmetatable(L, tname)) luaL_error(L, "Torch internal problem: cannot find metatable for type <%s>", tname); lua_setmetatable(L, -2); } else lua_pushnil(L); } void *luaT_toudata(lua_State *L, int ud, const char *tname) { void **p = lua_touserdata(L, ud); if(p != NULL) /* value is a userdata? */ { if(!luaT_pushmetatable(L, tname)) luaL_error(L, "Torch internal problem: cannot find metatable for type <%s>", tname); /* initialize the table we want to get the metatable on */ /* note that we have to be careful with indices, as we just inserted stuff */ lua_pushvalue(L, (ud < 0 ? ud - 1 : ud)); while(lua_getmetatable(L, -1)) /* get the next metatable */ { lua_remove(L, -2); /* remove the previous metatable [or object, if first time] */ if(lua_rawequal(L, -1, -2)) { lua_pop(L, 2); /* remove the two metatables */ return *p; } } lua_pop(L, 2); /* remove the two metatables */ } return NULL; } int luaT_isudata(lua_State *L, int ud, const char *tname) { if(luaT_toudata(L, ud, tname)) return 1; else return 0; } void *luaT_checkudata(lua_State *L, int ud, const char *tname) { void *p = luaT_toudata(L, ud, tname); if(!p) luaT_typerror(L, ud, tname); return p; } void luaT_pushlong(lua_State *L, long n) { #if LUA_VERSION_NUM >= 503 /* Only push the value as an integer if it fits in lua_Integer, or if the lua_Number representation will be even worse */ if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) { lua_pushinteger(L, n); } else { lua_pushnumber(L, (lua_Number)n); } #else lua_pushnumber(L, (lua_Number)n); #endif } long luaT_checklong(lua_State *L, int idx) { #if LUA_VERSION_NUM >= 503 if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) { return (long)luaL_checkinteger(L, idx); } else { return (long)luaL_checknumber(L, idx); } #else return (long)luaL_checknumber(L, idx); #endif } long luaT_tolong(lua_State *L, int idx) { #if LUA_VERSION_NUM == 503 if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) { return (long)lua_tointeger(L, idx); } else { return (long)lua_tonumber(L, idx); } #else return (long)lua_tonumber(L, idx); #endif } void luaT_pushinteger(lua_State *L, ptrdiff_t n) { #if LUA_VERSION_NUM >= 503 /* Only push the value as an integer if it fits in lua_Integer, or if the lua_Number representation will be even worse */ if (sizeof(lua_Integer) >= sizeof(ptrdiff_t) || sizeof(lua_Number) <= sizeof(lua_Integer)) { lua_pushinteger(L, n); } else { lua_pushnumber(L, (lua_Number)n); } #else lua_pushnumber(L, (lua_Number)n); #endif } ptrdiff_t luaT_checkinteger(lua_State *L, int idx) { #if LUA_VERSION_NUM >= 503 if (sizeof(lua_Integer) >= sizeof(ptrdiff_t) || sizeof(lua_Number) <= sizeof(lua_Integer)) { return (ptrdiff_t)luaL_checkinteger(L, idx); } else { return (ptrdiff_t)luaL_checknumber(L, idx); } #else return (ptrdiff_t)luaL_checknumber(L, idx); #endif } void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname) { void *p; lua_getfield(L, ud, field); if(lua_isnil(L, -1)) luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); p = luaT_toudata(L, -1, tname); if(!p) luaL_error(L, "bad argument #%d (field %s is not a %s)", ud, field, tname); return p; } void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field) { void *p; lua_getfield(L, ud, field); if(lua_isnil(L, -1)) luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); if(!lua_islightuserdata(L, -1)) luaL_error(L, "bad argument #%d (field %s is not a light userdata)", ud, field); p = lua_touserdata(L, -1); return p; } double luaT_getfieldchecknumber(lua_State *L, int ud, const char *field) { lua_getfield(L, ud, field); if(lua_isnil(L, -1)) luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); if(!lua_isnumber(L, -1)) luaL_error(L, "bad argument #%d (field %s is not a number)", ud, field); return lua_tonumber(L, -1); } int luaT_getfieldcheckint(lua_State *L, int ud, const char *field) { lua_getfield(L, ud, field); if(lua_isnil(L, -1)) luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); if(!lua_isnumber(L, -1)) luaL_error(L, "bad argument #%d (field %s is not a number)", ud, field); return (int)lua_tonumber(L, -1); } const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field) { lua_getfield(L, ud, field); if(lua_isnil(L, -1)) luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); if(!lua_isstring(L, -1)) luaL_error(L, "bad argument #%d (field %s is not a string)", ud, field); return lua_tostring(L, -1); } int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field) { lua_getfield(L, ud, field); if(lua_isnil(L, -1)) luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); if(!lua_isboolean(L, -1)) luaL_error(L, "bad argument #%d (field %s is not a boolean)", ud, field); return lua_toboolean(L, -1); } void luaT_getfieldchecktable(lua_State *L, int ud, const char *field) { lua_getfield(L, ud, field); if(lua_isnil(L, -1)) luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); if(!lua_istable(L, -1)) luaL_error(L, "bad argument #%d (field %s is not a table)", ud, field); } /**** type checks as in luaL ****/ int luaT_typerror(lua_State *L, int ud, const char *tname) { const char *msg; const char *tnameud = luaT_typename(L, ud); if(!tnameud) tnameud = lua_typename(L, ud); msg = lua_pushfstring(L, "%s expected, got %s", tname, (tnameud ? tnameud : "unknown object")); return luaL_argerror(L, ud, msg); } int luaT_checkboolean(lua_State *L, int ud) { if(!lua_isboolean(L, ud)) luaT_typerror(L, ud, lua_typename(L, LUA_TBOOLEAN)); return lua_toboolean(L, ud); } int luaT_optboolean(lua_State *L, int ud, int def) { if(lua_isnoneornil(L,ud)) return def; return luaT_checkboolean(L, ud); } void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name) { int idx = lua_gettop(L); luaL_checktype(L, idx, LUA_TTABLE); lua_pushstring(L, name); lua_rawget(L, idx); if(lua_isnil(L, -1)) { lua_pop(L, 1); lua_pushstring(L, name); lua_newtable(L); lua_rawset(L, idx); lua_pushstring(L, name); lua_rawget(L, idx); } luaT_setfuncs(L, methods, 0); lua_pop(L, 1); } /* returns the name of the class itself (sans nesting) */ const char* luaT_classrootname(const char *tname) { int idx; int sz = strlen(tname); for(idx = sz-1; idx >= 0 ; idx--) { if(tname[idx] == '.') return tname+idx+1; } return tname; } /* parent_name must be a buffer at least as big as tname. * If class has a parent, returns true; and, sets * parent name to that of full parent hierarchy (e.g. * given class `A.b.c`, sets parent_name to `A.b`) */ int luaT_fullparentname(const char *tname, char *parent_name) { int sz = strlen(tname); int idx; for(idx = sz-1; idx > 0 ; idx--) if(tname[idx] == '.' || tname[idx] == '\0') break; if (idx > 0) strncpy(parent_name, tname, idx); parent_name[idx] = '\0'; return tname[idx] == '.'; } /* alias for ensuring backwards compatibilty; * use of luaT_fullparentname is preferred. */ int luaT_classmodulename(const char *tname, char *parent_name) { return luaT_fullparentname(tname, parent_name); } /* parent_name must be a buffer at least as big as tname. * If class has a parent, returns true; and, sets * parent name to that of outermost parent (e.g. * given class `A.b.c`, sets parent_name to `A`) */ int luaT_outerparentname(const char *tname, char *parent_name) { char chars[] = {'.', '\0'}; size_t idx; idx = strcspn(tname, chars); strncpy(parent_name, tname, idx); parent_name[idx] = '\0'; return tname[idx] == '.'; } /* parent_name must be a buffer at least as big as tname. * If class has a parent, returns true; and, sets parent * name to that of innermost parent (e.g. given class * `A.b.c`, sets parent_name to `b`). In the comments * below, the inner parent name is abbreviated as IPN. */ int luaT_innerparentname(const char *tname, char *parent_name) { int sz = strlen(tname); int tail, head; for(tail = sz-1; tail >= 0 ; tail--) // tail points to if(tname[tail] == '.') break; // just past IPN if (tail == 0) return 0; for(head = tail-1; head >= 0; head--) // head points to if(tname[head] == '.') break; // just before IPN head += 1; // update head to start of IPN tail -= head; // update tail to strlen(IPN) strncpy(parent_name, tname+head, tail); parent_name[tail] = '\0'; return 1; } /* Method for pushing a class's immediate parent to the * stack (e.g. given class `A.b.c`, pushes `b` to the stack) */ void luaT_getinnerparent(lua_State *L, const char *tname) { /* Local variables */ char term[256]; char chars[] = {'.', '\0'}; const char *tname_full = tname; // used for error case /* Get outermost table from Lua */ int n = strcspn(tname, chars); strncpy(term, tname, n); term[n] = '\0'; lua_getglobal(L, term); tname += n + 1; /* Traverse hierarchy down to last table*/ n = strcspn(tname, chars); while(n < strlen(tname)) { /* Check that current parent is a table (i.e. a module) */ if(!lua_istable(L, -1)){ strncpy(term, tname_full, tname - tname_full - 1); term[tname - tname_full] = '\0'; luaL_error(L, "while creating metatable %s: bad argument #1 (%s is an invalid module name)", tname_full, term); } strncpy(term, tname, n); term[n] = '\0'; lua_getfield(L, -1, term); lua_remove(L, -2); tname += n + 1; n = strcspn(tname, chars); // prepare for next } /* Check that resulting parent is a table (i.e. a module) */ if(!lua_istable(L, -1)){ strncpy(term, tname_full, tname - tname_full - 1); term[tname - tname_full] = '\0'; luaL_error(L, "while creating metatable %s: bad argument #1 (%s is an invalid module name)", tname_full, term); } } int luaT_lua_newmetatable(lua_State *L) { /* Local Variables */ const char* tname = luaL_checkstring(L, 1); char parent_name[256]; int is_in_module = 0; /* Argument Checking */ lua_settop(L, 6); luaL_argcheck(L, lua_isnoneornil(L, 2) || lua_isstring(L, 2), 2, "parent class name or nil expected"); luaL_argcheck(L, lua_isnoneornil(L, 3) || lua_isfunction(L, 3), 3, "constructor function or nil expected"); luaL_argcheck(L, lua_isnoneornil(L, 4) || lua_isfunction(L, 4), 4, "destructor function or nil expected"); luaL_argcheck(L, lua_isnoneornil(L, 5) || lua_isfunction(L, 5), 5, "factory function or nil expected"); luaL_argcheck(L, lua_isnoneornil(L, 6) || lua_istable(L, 6), 6, "module table or nil expected"); /* Push immediate parent module to stack */ if(lua_isnoneornil(L, 6)) { lua_pop(L, 1); /* remove the nil */ is_in_module = luaT_fullparentname(tname, parent_name); if (is_in_module) luaT_getinnerparent(L, tname); else lua_pushglobaltable(L); } if(!lua_istable(L, -1)) luaL_error(L, "while creating metatable %s: bad argument #1 (%s is an invalid module name)", tname, parent_name); /* we first create the new metaclass if we have to */ if(!luaT_pushmetatable(L, tname)) { /* create the metatable */ lua_newtable(L); /* registry[name] = metatable */ lua_pushvalue(L, -1); lua_setfield(L, LUA_REGISTRYINDEX, tname); /* registry[metatable] = tname */ lua_pushvalue(L, -1); lua_pushstring(L, tname); lua_rawset(L, LUA_REGISTRYINDEX); /* __index handling */ lua_pushcfunction(L, luaT_mt__index); lua_setfield(L, -2, "__index"); /* __newindex handling */ lua_pushcfunction(L, luaT_mt__newindex); lua_setfield(L, -2, "__newindex"); /* __typename contains the typename */ lua_pushstring(L, tname); lua_setfield(L, -2, "__typename"); /* __metatable is self */ lua_pushvalue(L, -1); lua_setfield(L, -2, "__metatable"); /* by default, __version equals 1 */ lua_pushnumber(L, 1); lua_setfield(L, -2, "__version"); /* assign default operator functions */ lua_pushcfunction(L, luaT_mt__tostring); lua_setfield(L, -2, "__tostring"); lua_pushcfunction(L, luaT_mt__add); lua_setfield(L, -2, "__add"); lua_pushcfunction(L, luaT_mt__sub); lua_setfield(L, -2, "__sub"); lua_pushcfunction(L, luaT_mt__mul); lua_setfield(L, -2, "__mul"); lua_pushcfunction(L, luaT_mt__div); lua_setfield(L, -2, "__div"); lua_pushcfunction(L, luaT_mt__mod); lua_setfield(L, -2, "__mod"); lua_pushcfunction(L, luaT_mt__pow); lua_setfield(L, -2, "__pow"); lua_pushcfunction(L, luaT_mt__unm); lua_setfield(L, -2, "__unm"); lua_pushcfunction(L, luaT_mt__concat); lua_setfield(L, -2, "__concat"); lua_pushcfunction(L, luaT_mt__len); lua_setfield(L, -2, "__len"); lua_pushcfunction(L, luaT_mt__eq); lua_setfield(L, -2, "__eq"); lua_pushcfunction(L, luaT_mt__lt); lua_setfield(L, -2, "__lt"); lua_pushcfunction(L, luaT_mt__le); lua_setfield(L, -2, "__le"); lua_pushcfunction(L, luaT_mt__call); lua_setfield(L, -2, "__call"); } /* we assign the parent class if necessary */ if(!lua_isnoneornil(L, 2)) { if(lua_getmetatable(L, -1)) luaL_error(L, "class %s has been already assigned a parent class\n", tname); else { const char* parent_tname = luaL_checkstring(L, 2); if(!luaT_pushmetatable(L, parent_tname)) luaL_error(L, "bad argument #2 (invalid parent class name %s)", parent_tname); lua_setmetatable(L, -2); } } /* register the destructor function */ if(!lua_isnoneornil(L, 4)) { /* does it exists already? */ lua_pushstring(L, "__gc"); lua_rawget(L, -2); if(lua_isnil(L, -1)) { lua_pop(L, 1); /* pop nil */ lua_pushstring(L, "__gc"); lua_pushvalue(L, 4); lua_rawset(L, -3); } else luaL_error(L, "%s has been already assigned a destructor", tname); } /* register the factory function */ if(!lua_isnoneornil(L, 5)) { /* does it exists already? */ lua_pushstring(L, "__factory"); lua_rawget(L, -2); if(lua_isnil(L, -1)) { lua_pop(L, 1); /* pop nil */ lua_pushstring(L, "__factory"); lua_pushvalue(L, 5); lua_rawset(L, -3); } else luaL_error(L, "%s has been already assigned a factory", tname); } /******** Constructor table and metatable ********/ lua_pushstring(L, "__constructor"); lua_rawget(L, -2); if(lua_isnil(L, -1)) { lua_pop(L, 1); /* pop nil */ lua_newtable(L); /* fancy table */ lua_newtable(L); /* fancy metatable */ lua_pushvalue(L, -3); /* metatable */ lua_setfield(L, -2, "__index"); /* so we can get the methods */ lua_pushcfunction(L, luaT_cmt__newindex); lua_setfield(L, -2, "__newindex"); /* so we add new methods */ lua_pushcfunction(L, luaT_cmt__call); lua_setfield(L, -2, "__call"); /* so we can create, we are here for only that */ lua_pushvalue(L, -3); lua_setfield(L, -2, "__metatable"); /* redirect to metatable with methods */ lua_setmetatable(L, -2); /* constructor metatable is ... this fancy metatable */ /* set metatable[__constructor] = constructor-metatable */ lua_pushstring(L, "__constructor"); lua_pushvalue(L, -2); lua_rawset(L, -4); } /* register the constructor function */ if(!lua_isnoneornil(L, 3)) { /* get constructor metatable */ lua_getmetatable(L, -1); /* does it exists already? */ lua_pushstring(L, "__new"); lua_rawget(L, -2); if(lua_isnil(L, -1)) { lua_pop(L, 1); /* pop nil */ lua_pushstring(L, "__new"); lua_pushvalue(L, 3); lua_rawset(L, -3); /* set "new" in the metatable too */ lua_pushstring(L, "new"); lua_pushvalue(L, 3); lua_rawset(L, -5); } else luaL_error(L, "%s has been already assigned a constructor", tname); /* pop constructor metatable */ lua_pop(L, 1); } /* module.name = constructor metatable */ lua_setfield(L, 6, luaT_classrootname(tname)); return 1; /* returns the metatable */ } /* Lua only utility functions */ /* add any custom type, provided the object has a metatable */ int luaT_lua_metatype(lua_State *L) { if( (lua_gettop(L) != 2) && (lua_gettop(L) != 3) ) luaL_error(L, "expecting: string table [ctype]"); luaL_checkstring(L, 1); luaL_checktype(L, 2, LUA_TTABLE); if(lua_gettop(L) == 3) { if(!luaT_cdataname(L, 3, lua_tostring(L, 1))) luaL_error(L, "could not register cdata type -- missing ffi library?"); } /* registry[name] = metatable */ lua_pushvalue(L, 1); lua_pushvalue(L, 2); lua_rawset(L, LUA_REGISTRYINDEX); /* registry[metatable] = tname */ lua_pushvalue(L, 2); lua_pushvalue(L, 1); lua_rawset(L, LUA_REGISTRYINDEX); return 0; } /* return a userdata from a C pointer */ /* you are better to know what you are doing */ int luaT_lua_pushudata(lua_State *L) { void *udata = NULL; const char *tname = luaL_checkstring(L, 2); if(lua_type(L, 1) == 10) udata = *((void**)lua_topointer(L, 1)); else if(luaT_iscdata(L, 1)) udata = ((void**)lua_topointer(L, 1))[4]; else if(lua_isnumber(L, 1)) udata = (void*)(uintptr_t)lua_tonumber(L, 1); else luaL_argerror(L, 1, "expecting number or cdata"); luaT_pushudata(L, udata, tname); return 1; } int luaT_lua_factory(lua_State *L) { const char* tname = luaL_checkstring(L, 1); if(luaT_pushmetatable(L, tname) && !lua_isnil(L, -1)) { lua_pushstring(L, "__factory"); lua_rawget(L, -2); } else { lua_pushnil(L); } return 1; } int luaT_lua_getconstructortable(lua_State *L) { const char* tname = luaL_checkstring(L, 1); if(luaT_pushmetatable(L, tname)) { lua_pushstring(L, "__constructor"); lua_rawget(L, -2); return 1; } return 0; } int luaT_lua_typename(lua_State *L) { const char* tname = NULL; luaL_checkany(L, 1); if((tname = luaT_typename(L, 1))) { lua_pushstring(L, tname); return 1; } return 0; } int luaT_lua_isequal(lua_State *L) { if(lua_isuserdata(L, 1) && lua_isuserdata(L, 2)) { void **u1, **u2; luaL_argcheck(L, luaT_typename(L, 1), 1, "Torch object expected"); luaL_argcheck(L, luaT_typename(L, 2), 2, "Torch object expected"); u1 = lua_touserdata(L, 1); u2 = lua_touserdata(L, 2); if(*u1 == *u2) lua_pushboolean(L, 1); else lua_pushboolean(L, 0); } else if(lua_istable(L, 1) && lua_istable(L, 2)) lua_pushboolean(L, lua_rawequal(L, 1, 2)); else lua_pushboolean(L, 0); return 1; } static void luaT_pushpointer(lua_State *L, const void *ptr) { #if LUA_VERSION_NUM >= 503 // this assumes that lua_Integer is a ptrdiff_t if (sizeof(void *) > sizeof(lua_Integer)) luaL_error(L, "Pointer value can't be represented as a Lua integer (an overflow would occur)"); lua_pushinteger(L, (uintptr_t)(ptr)); #else // 2^53 - this assumes that lua_Number is a double if ((uintptr_t)ptr > 9007199254740992LLU) luaL_error(L, "Pointer value can't be represented as a Lua number (an overflow would occur)"); lua_pushnumber(L, (uintptr_t)(ptr)); #endif } int luaT_lua_pointer(lua_State *L) { if(lua_type(L, 1) == 10) /* luajit cdata */ { /* we want the pointer holded by cdata */ /* not the pointer on the cdata object */ const void* ptr = *((void**)lua_topointer(L, 1)); luaT_pushpointer(L, ptr); return 1; } else if (luaT_iscdata(L, 1)) /* luaffi cdata */ { void** ptr = (void**)lua_touserdata(L, 1); luaT_pushpointer(L, ptr[4]); return 1; } else if(lua_isuserdata(L, 1)) { void **ptr; luaL_argcheck(L, luaT_typename(L, 1), 1, "Torch object expected"); ptr = lua_touserdata(L, 1); luaT_pushpointer(L, *ptr); return 1; } else if(lua_istable(L, 1) || lua_isthread(L, 1) || lua_isfunction(L, 1)) { const void* ptr = lua_topointer(L, 1); luaT_pushpointer(L, ptr); return 1; } else if(lua_isstring(L, 1)) { const char* ptr = lua_tostring(L, 1); luaT_pushpointer(L, ptr); return 1; } else luaL_error(L, "Torch object, table, thread, cdata or function expected"); return 0; } int luaT_lua_setenv(lua_State *L) { if(!lua_isfunction(L, 1) && !lua_isuserdata(L, 1)) luaL_typerror(L, 1, "function or userdata"); luaL_checktype(L, 2, LUA_TTABLE); lua_setuservalue(L, 1); return 0; } int luaT_lua_getenv(lua_State *L) { if(!lua_isfunction(L, 1) && !lua_isuserdata(L, 1)) luaL_typerror(L, 1, "function or userdata"); lua_getuservalue(L, 1); if (lua_isnil(L, -1)) lua_newtable(L); return 1; } int luaT_lua_getmetatable(lua_State *L) { const char *tname = luaL_checkstring(L, 1); if(luaT_pushmetatable(L, tname)) return 1; return 0; } int luaT_lua_version(lua_State *L) { luaL_checkany(L, 1); if(luaT_iscdata(L, 1)) { const char *tname = luaT_cdataname(L, 1, NULL); if(tname) { luaT_pushmetatable(L, tname); lua_pushstring(L, "__version"); lua_rawget(L, -2); return 1; } return 0; } else if(lua_getmetatable(L, 1)) { lua_pushstring(L, "__version"); lua_rawget(L, -2); return 1; } return 0; } int luaT_lua_setmetatable(lua_State *L) { const char *tname = luaL_checkstring(L, 2); luaL_checktype(L, 1, LUA_TTABLE); if(!luaT_pushmetatable(L, tname)) luaL_error(L, "unknown typename %s\n", tname); lua_setmetatable(L, 1); return 1; } /* metatable operator methods */ static int luaT_mt__index(lua_State *L) { if(!lua_getmetatable(L, 1)) luaL_error(L, "critical internal indexing error: no metatable found"); if(!lua_istable(L, -1)) luaL_error(L, "critical internal indexing error: not a metatable"); /* test for __index__ method first */ lua_getfield(L, -1, "__index__"); if(!lua_isnil(L, -1)) { int result; if(!lua_isfunction(L, -1)) luaL_error(L, "critical internal indexing error: __index__ is not a function"); lua_pushvalue(L, 1); lua_pushvalue(L, 2); lua_call(L, 2, LUA_MULTRET); /* DEBUG: risque: faut vraiment retourner 1 ou 2 valeurs... */ result = lua_toboolean(L, -1); lua_pop(L, 1); if(result) return 1; /* on the stack: 1. the object 2. the value 3. the metatable */ /* apparently, __index wants only one element returned */ /* return lua_gettop(L)-3; */ } else lua_pop(L, 1); /* remove nil __index__ on the stack */ lua_pushvalue(L, 2); lua_gettable(L, -2); return 1; } static int luaT_mt__newindex(lua_State *L) { if(!lua_getmetatable(L, 1)) luaL_error(L, "critical internal indexing error: no metatable found"); if(!lua_istable(L, -1)) luaL_error(L, "critical internal indexing error: not a metatable"); /* test for __newindex__ method first */ lua_getfield(L, -1, "__newindex__"); if(!lua_isnil(L, -1)) { int result; if(!lua_isfunction(L, -1)) luaL_error(L, "critical internal indexing error: __newindex__ is not a function"); lua_pushvalue(L, 1); lua_pushvalue(L, 2); lua_pushvalue(L, 3); lua_call(L, 3, 1); /* DEBUG: risque: faut vraiment retourner qqch */ result = lua_toboolean(L, -1); lua_pop(L, 1); if(result) return 0; } else lua_pop(L, 1); /* remove nil __newindex__ on the stack */ lua_pop(L, 1); /* pop the metatable */ if(lua_istable(L, 1)) lua_rawset(L, 1); else luaL_error(L, "the class %s cannot be indexed", luaT_typename(L, 1)); return 0; } #define MT_UNI_OPERATOR_GET_HANDLER(NAME) \ if(!lua_getmetatable(L, 1)) \ luaL_error(L, "internal error in __" #NAME ": no metatable"); #define MT_BIN_OPERATOR_GET_HANDLER(NAME) \ if(!lua_getmetatable(L, 1) && !lua_getmetatable(L,2) ) \ luaL_error(L, "internal error in __" #NAME \ ": no metatable in both operands"); #define MT_DECLARE_OPERATOR_BODY(NAME, NIL_BEHAVIOR) \ \ lua_getfield(L, -1, "__" #NAME "__"); \ if(lua_isnil(L, -1)) \ { \ NIL_BEHAVIOR; \ } \ else \ { \ if(lua_isfunction(L, -1)) \ { \ lua_insert(L, 1); /* insert function */ \ lua_pop(L, 1); /* remove metatable */ \ lua_call(L, lua_gettop(L)-1, LUA_MULTRET); \ /* we return the result of the call */ \ return lua_gettop(L); \ } \ /* we return the thing the user left in __tostring__ */ \ } \ return 0; \ /* note: check dans metatable pour ca, donc necessaire */ #define MT_DECLARE_OPERATOR(NAME, NIL_BEHAVIOR) \ int luaT_mt__##NAME(lua_State *L) \ { \ MT_UNI_OPERATOR_GET_HANDLER(NAME) \ MT_DECLARE_OPERATOR_BODY(NAME,NIL_BEHAVIOR) \ } #define MT_DECLARE_BIN_OPERATOR(NAME, NIL_BEHAVIOR) \ int luaT_mt__##NAME(lua_State *L) \ { \ MT_BIN_OPERATOR_GET_HANDLER(NAME) \ MT_DECLARE_OPERATOR_BODY(NAME,NIL_BEHAVIOR) \ } #define BIN_OPERATOR_ERROR(NAME) \ luaL_error(L, "both %s and %s have no " #NAME " operator", \ luaT_typename(L, 1), luaT_typename(L,2)) MT_DECLARE_BIN_OPERATOR(add, BIN_OPERATOR_ERROR(addition) ) MT_DECLARE_BIN_OPERATOR(sub, BIN_OPERATOR_ERROR(substraction) ) MT_DECLARE_BIN_OPERATOR(mul, BIN_OPERATOR_ERROR(multiplication) ) MT_DECLARE_BIN_OPERATOR(div, BIN_OPERATOR_ERROR(division) ) MT_DECLARE_BIN_OPERATOR(mod, BIN_OPERATOR_ERROR(modulo) ) MT_DECLARE_BIN_OPERATOR(pow, BIN_OPERATOR_ERROR(power) ) MT_DECLARE_BIN_OPERATOR(concat, BIN_OPERATOR_ERROR(concat) ) MT_DECLARE_BIN_OPERATOR(eq, lua_settop(L, 2); lua_pushcfunction(L, luaT_lua_isequal); lua_insert(L, 1); lua_call(L, 2, 1); return 1;) MT_DECLARE_BIN_OPERATOR(lt, BIN_OPERATOR_ERROR(less-than) ) MT_DECLARE_BIN_OPERATOR(le, BIN_OPERATOR_ERROR(less-equal) ) MT_DECLARE_OPERATOR(tostring, lua_pushstring(L, luaT_typename(L, 1)); return 1;) MT_DECLARE_OPERATOR(call, luaL_error(L, "%s has no call operator", luaT_typename(L, 1))) MT_DECLARE_OPERATOR(unm, luaL_error(L, "%s has no negation operator", luaT_typename(L, 1))) MT_DECLARE_OPERATOR(len, luaL_error(L, "%s has no length operator", luaT_typename(L, 1))) /* constructor metatable methods */ int luaT_cmt__call(lua_State *L) { if(!lua_istable(L, 1)) luaL_error(L, "internal error in __call: not a constructor table"); if(!lua_getmetatable(L, 1)) luaL_error(L, "internal error in __call: no metatable available"); lua_pushstring(L, "__new"); lua_rawget(L, -2); if(lua_isnil(L, -1)) luaL_error(L, "no constructor available"); lua_remove(L, 1); /* remove constructor atable */ lua_insert(L, 1); /* insert constructor */ lua_pop(L, 1); /* remove fancy metatable */ lua_call(L, lua_gettop(L)-1, LUA_MULTRET); return lua_gettop(L); } int luaT_cmt__newindex(lua_State *L) { if(!lua_istable(L, 1)) luaL_error(L, "internal error in __newindex: not a constructor table"); if(!lua_getmetatable(L, 1)) luaL_error(L, "internal error in __newindex: no metatable available"); lua_pushstring(L, "__metatable"); lua_rawget(L, -2); if(!lua_istable(L, -1)) luaL_error(L, "internal error in __newindex: no metaclass available"); lua_insert(L, 2); lua_pop(L, 1); /* remove the metatable over the constructor table */ lua_rawset(L, -3); return 0; } /******************** deprecated functions ********************/ int luaT_pushmetaclass(lua_State *L, const char *tname) { return luaT_pushmetatable(L, tname); } const char* luaT_id(lua_State *L, int ud) { return luaT_typename(L, ud); } const char* luaT_id2typename(lua_State *L, const char *id) { return id; } const char* luaT_typename2id(lua_State *L, const char *tname) { return luaT_typenameid(L, tname); } int luaT_getmetaclass(lua_State *L, int index) { return lua_getmetatable(L, index); } const char* luaT_checktypename2id(lua_State *L, const char *tname) { const char* id = luaT_typenameid(L, tname); if(!id) luaL_error(L, "unknown class <%s>", tname); return id; } void luaT_registeratid(lua_State *L, const struct luaL_Reg *methods, const char *id) { luaT_registeratname(L, methods, id); } /**************************************************************/ lib/luaT/luaT.h000066400000000000000000000124701316246254300136210ustar00rootroot00000000000000#ifndef LUAT_UTILS_INC #define LUAT_UTILS_INC #ifdef __cplusplus extern "C" { #endif #include #include #ifdef __cplusplus } #endif #ifndef LUA_EXTERNC # ifdef __cplusplus # define LUA_EXTERNC extern "C" # else # define LUA_EXTERNC extern # endif #endif #if (defined(_MSC_VER) || defined(__MINGW32__)) # define DLL_EXPORT __declspec(dllexport) # define DLL_IMPORT __declspec(dllimport) # ifdef luaT_EXPORTS # define LUAT_API LUA_EXTERNC DLL_EXPORT # else # define LUAT_API LUA_EXTERNC DLL_IMPORT # endif #else # define DLL_EXPORT # define DLL_IMPORT # define LUAT_API LUA_EXTERNC #endif #if LUA_VERSION_NUM == 501 # define lua_pushglobaltable(L) lua_pushvalue(L, LUA_GLOBALSINDEX) # define lua_setuservalue lua_setfenv # define lua_getuservalue lua_getfenv #else # define lua_objlen lua_rawlen static int luaL_typerror(lua_State *L, int narg, const char *tname) { return luaL_error(L, "%s expected, got %s", tname, luaL_typename(L, narg)); } #endif /* C functions */ LUAT_API void* luaT_alloc(lua_State *L, ptrdiff_t size); LUAT_API void* luaT_realloc(lua_State *L, void *ptr, ptrdiff_t size); LUAT_API void luaT_free(lua_State *L, void *ptr); LUAT_API void luaT_setfuncs(lua_State *L, const luaL_Reg *l, int nup); LUAT_API const char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parent_tname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx); LUAT_API const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parenttname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory); LUAT_API int luaT_pushmetatable(lua_State *L, const char *tname); LUAT_API const char* luaT_typenameid(lua_State *L, const char *tname); LUAT_API const char* luaT_typename(lua_State *L, int ud); LUAT_API void luaT_pushudata(lua_State *L, void *udata, const char *tname); LUAT_API void *luaT_toudata(lua_State *L, int ud, const char *tname); LUAT_API int luaT_isudata(lua_State *L, int ud, const char *tname); LUAT_API void *luaT_checkudata(lua_State *L, int ud, const char *tname); LUAT_API void luaT_pushlong(lua_State *L, long n); LUAT_API long luaT_checklong(lua_State *L, int idx); LUAT_API long luaT_tolong(lua_State *L, int idx); LUAT_API void luaT_pushinteger(lua_State *L, ptrdiff_t n); LUAT_API ptrdiff_t luaT_checkinteger(lua_State *L, int idx); LUAT_API void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname); LUAT_API void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field); LUAT_API double luaT_getfieldchecknumber(lua_State *L, int ud, const char *field); LUAT_API int luaT_getfieldcheckint(lua_State *L, int ud, const char *field); LUAT_API const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field); LUAT_API int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field); LUAT_API void luaT_getfieldchecktable(lua_State *L, int ud, const char *field); LUAT_API int luaT_typerror(lua_State *L, int ud, const char *tname); LUAT_API int luaT_checkboolean(lua_State *L, int ud); LUAT_API int luaT_optboolean(lua_State *L, int ud, int def); LUAT_API void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name); /* utility functions */ LUAT_API const char *luaT_classrootname(const char *tname); LUAT_API int luaT_classmodulename(const char *tname, char *module_name); /* debug */ LUAT_API void luaT_stackdump(lua_State *L); /* Lua functions */ LUAT_API int luaT_lua_newmetatable(lua_State *L); LUAT_API int luaT_lua_factory(lua_State *L); LUAT_API int luaT_lua_getconstructortable(lua_State *L); LUAT_API int luaT_lua_typename(lua_State *L); LUAT_API int luaT_lua_isequal(lua_State *L); LUAT_API int luaT_lua_pointer(lua_State *L); LUAT_API int luaT_lua_setenv(lua_State *L); LUAT_API int luaT_lua_getenv(lua_State *L); LUAT_API int luaT_lua_getmetatable(lua_State *L); LUAT_API int luaT_lua_version(lua_State *L); LUAT_API int luaT_lua_setmetatable(lua_State *L); LUAT_API int luaT_lua_metatype(lua_State *L); LUAT_API int luaT_lua_pushudata(lua_State *L); /* deprecated functions */ /* ids have been replaced by string names to identify classes */ /* comments show what function (that you should use) they call now */ #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) #define LUAT_DEPRECATED __attribute__((__deprecated__)) #elif (defined(_MSC_VER) || defined(__MINGW32__)) #define LUAT_DEPRECATED __declspec(deprecated) #else #define LUAT_DEPRECATED #endif LUAT_API LUAT_DEPRECATED int luaT_pushmetaclass(lua_State *L, const char *tname); /* same as luaT_pushmetatable */ LUAT_API LUAT_DEPRECATED const char* luaT_id(lua_State *L, int ud); /* same as luaT_typename */ LUAT_API LUAT_DEPRECATED const char* luaT_id2typename(lua_State *L, const char *id); /* same as luaT_typenameid */ LUAT_API LUAT_DEPRECATED const char* luaT_typename2id(lua_State *L, const char*); /* same as luaT_typenameid */ LUAT_API LUAT_DEPRECATED int luaT_getmetaclass(lua_State *L, int index); /* same as luaT_getmetatable */ LUAT_API LUAT_DEPRECATED const char* luaT_checktypename2id(lua_State *L, const char *tname); /* same as luaT_typenameid */ LUAT_API LUAT_DEPRECATED void luaT_registeratid(lua_State *L, const struct luaL_Reg *methods, const char *id); /* same as luaT_registeratname */ #endif lib/luaT/luaTConfig.cmake.in000066400000000000000000000004251316246254300162020ustar00rootroot00000000000000# Find the luaT includes and library # # LUAT_INCLUDE_DIR -- where to find the includes # LUAT_LIBRARIES -- list of libraries to link against # LUAT_FOUND -- set to 1 if found SET(LUAT_FOUND 1) SET(LUAT_INCLUDE_DIR "@LUAT_INCLUDE_DIR@") SET(LUAT_LIBRARIES "@LUAT_LIBRARIES@") mkdocs.yml000066400000000000000000000013351316246254300130710ustar00rootroot00000000000000site_name: torch7 theme : simplex repo_url : https://github.com/torch/torch7 use_directory_urls : false markdown_extensions: [extra] docs_dir : doc pages: - [index.md, Home] - [tensor.md, Tensor Library, Tensor] - [maths.md, Tensor Library, Tensor Math] - [storage.md, Tensor Library, Storage] - [file.md, File I/O Library, File Interface] - [diskfile.md, File I/O Library, Disk File] - [memoryfile.md, File I/O Library, Memory File] - [pipefile.md, File I/O Library, Pipe File] - [serialization.md, File I/O Library, Serialization] - [utility.md, Useful Utilities, Class] - [timer.md, Useful Utilities, Timer] - [tester.md, Useful Utilities, Tester] - [cmdline.md, Useful Utilities, CmdLine] - [random.md, Useful Utilities, Random] paths.lua.in000066400000000000000000000005521316246254300133150ustar00rootroot00000000000000local paths = {} paths.install_prefix = [[@Torch_INSTALL_PREFIX@]] paths.install_bin = [[@Torch_INSTALL_BIN@]] paths.install_man = [[@Torch_INSTALL_MAN@]] paths.install_lib = [[@Torch_INSTALL_LIB@]] paths.install_share = [[@Torch_INSTALL_SHARE@]] paths.install_include = [[@Torch_INSTALL_INCLUDE@]] paths.install_cmake = [[@Torch_INSTALL_CMAKE@]] return paths random.lua000066400000000000000000000025241316246254300130520ustar00rootroot00000000000000local wrap = require 'cwrap' require 'torchcwrap' local interface = wrap.CInterface.new() interface:print( [[ #include "luaT.h" #include "TH.h" extern void torch_Generator_init(lua_State *L); extern void torch_Generator_new(lua_State *L); ]]) for _,name in ipairs({"seed", "initialSeed"}) do interface:wrap(name, string.format("THRandom_%s",name), {{name='Generator', default=true}, {name="long", creturned=true}}) end interface:wrap('manualSeed', 'THRandom_manualSeed', {{name='Generator', default=true}, {name="long"}}) interface:wrap('getRNGState', 'THByteTensor_getRNGState', {{name='Generator', default=true}, {name='ByteTensor',default=true,returned=true,method={default='nil'}} }) interface:wrap('setRNGState', 'THByteTensor_setRNGState', {{name='Generator', default=true}, {name='ByteTensor',default=true,returned=true,method={default='nil'}} }) interface:register("random__") interface:print( [[ void torch_random_init(lua_State *L) { torch_Generator_init(L); torch_Generator_new(L); lua_setfield(L, -2, "_gen"); luaT_setfuncs(L, random__, 0); } ]]) interface:tofile(arg[1]) rocks/000077500000000000000000000000001316246254300122055ustar00rootroot00000000000000rocks/torch-scm-1.rockspec000066400000000000000000000021201316246254300157700ustar00rootroot00000000000000package = "torch" version = "scm-1" source = { url = "git://github.com/torch/torch7.git", } description = { summary = "Torch7", detailed = [[ ]], homepage = "https://github.com/torch/torch7", license = "BSD" } dependencies = { "lua >= 5.1", "paths >= 1.0", "cwrap >= 1.0" } build = { type = "command", build_command = [[ cmake -E make_directory build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DLUA=$(LUA) -DLUALIB=$(LUALIB) -DLUA_BINDIR="$(LUA_BINDIR)" -DLUA_INCDIR="$(LUA_INCDIR)" -DLUA_LIBDIR="$(LUA_LIBDIR)" -DLUADIR="$(LUADIR)" -DLIBDIR="$(LIBDIR)" -DCMAKE_INSTALL_PREFIX="$(PREFIX)" && $(MAKE) -j$(getconf _NPROCESSORS_ONLN) ]], platforms = { windows = { build_command = [[ cmake -E make_directory build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DLUA=$(LUA) -DLUALIB=$(LUALIB) -DLUA_BINDIR="$(LUA_BINDIR)" -DLUA_INCDIR="$(LUA_INCDIR)" -DLUA_LIBDIR="$(LUA_LIBDIR)" -DLUADIR="$(LUADIR)" -DLIBDIR="$(LIBDIR)" -DCMAKE_INSTALL_PREFIX="$(PREFIX)" && $(MAKE) ]] } }, install_command = "cd build && $(MAKE) install" } test/000077500000000000000000000000001316246254300120435ustar00rootroot00000000000000test/longSize.lua000066400000000000000000000023211316246254300143360ustar00rootroot00000000000000require 'torch' local tester = torch.Tester() local tests = torch.TestSuite() local tensor = torch.rand(2,3) function tests.diskFileLongSize8() f = torch.DiskFile('tensor8.bin','w') f:binary() f:longSize(8) f:writeObject(tensor) f:close() f = torch.DiskFile('tensor8.bin','r') f:binary() f:longSize(8) tensor2 = f:readObject() f:close() tester:assert(tensor:norm()==tensor2:norm()) os.remove('tensor8.bin') end function tests.diskFileLongSize4() f = torch.DiskFile('tensor4.bin','w') f:binary() f:longSize(4) f:writeObject(tensor) f:close() f = torch.DiskFile('tensor4.bin','r') f:binary() f:longSize(4) tensor2 = f:readObject() f:close() tester:assert(tensor:norm()==tensor2:norm()) os.remove('tensor4.bin') end function tests.memoryFileLongSize8() f = torch.MemoryFile() f:binary() f:longSize(8) f:writeObject(tensor) f:seek(1) tensor2 = f:readObject() f:close() tester:assert(tensor:norm()==tensor2:norm()) end function tests.memoryFileLongSize4() f = torch.MemoryFile() f:binary() f:longSize(4) f:writeObject(tensor) f:seek(1) tensor2 = f:readObject() f:close() tester:assert(tensor:norm()==tensor2:norm()) end tester:add(tests) tester:run() test/test.lua000066400000000000000000004201721316246254300135330ustar00rootroot00000000000000--require 'torch' local mytester local torchtest = torch.TestSuite() local msize = 100 local precision -- Lua 5.2 compatibility local loadstring = loadstring or load local unpack = unpack or table.unpack local function maxdiff(x,y) local d = x-y if x:type() == 'torch.DoubleTensor' or x:type() == 'torch.FloatTensor' then return d:abs():max() else local dd = torch.Tensor():resize(d:size()):copy(d) return dd:abs():max() end end -- workarounds for non-existant functions function torch.HalfTensor:__sub(other) return (self:real() - other:real()):half() end function torch.HalfTensor:mean(dim) return self:real():mean(dim):half() end function torch.HalfTensor:abs() return self:real():abs():half() end function torch.HalfTensor:max() return self:real():max() end function torch.HalfTensor:add(a, b) return (self:real():add(a, b:real())):half() end function torch.HalfTensor:reshape(a, b) return (self:real():reshape(a, b)):half() end function torch.HalfTensor:fill(a) return self:real():fill(a):half() end function torchtest.dot() local types = { ['torch.DoubleTensor'] = 1e-8, -- for ddot ['torch.FloatTensor'] = 1e-4, -- for sdot } for tname, prec in pairs(types) do local v1 = torch.randn(100):type(tname) local v2 = torch.randn(100):type(tname) local res1 = torch.dot(v1,v2) local res2 = 0 for i = 1,v1:size(1) do res2 = res2 + v1[i] * v2[i] end local err = math.abs(res1-res2) mytester:assertlt(err, prec, 'error in torch.dot (' .. tname .. ')') end end local genericSingleOpTest = [[ -- [res] torch.functionname([res,] x) -- contiguous local m1 = torch.randn(100,100) local res1 = torch.functionname(m1[{ 4,{} }]) local res2 = res1:clone():zero() for i = 1,res1:size(1) do res2[i] = math.functionname(m1[4][i]) end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do err[i] = math.abs(res1[i] - res2[i]) end -- find maximum element of error local maxerrc = 0 for i = 1, err:size(1) do if err[i] > maxerrc then maxerrc = err[i] end end -- non-contiguous local m1 = torch.randn(100,100) local res1 = torch.functionname(m1[{ {}, 4 }]) local res2 = res1:clone():zero() for i = 1,res1:size(1) do res2[i] = math.functionname(m1[i][4]) end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do err[i] = math.abs(res1[i] - res2[i]) end -- find maximum element of error local maxerrnc = 0 for i = 1, err:size(1) do if err[i] > maxerrnc then maxerrnc = err[i] end end return maxerrc, maxerrnc --]] function torchtest.sin() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'sin')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.sinh() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'sinh')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.asin() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'asin')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.cos() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'cos')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.cosh() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'cosh')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.acos() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'acos')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.tan() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'tan')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.tanh() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'tanh')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.atan() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'atan')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.log() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'log')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.sqrt() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'sqrt')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.rsqrt() local function TH_rsqrt(x) return 1 / math.sqrt(x) end local f local t = genericSingleOpTest:gsub('functionname', 'rsqrt'):gsub('math.rsqrt', 'TH_rsqrt') local env = { TH_rsqrt=TH_rsqrt, torch=torch, math=math } if not setfenv then -- Lua 5.2 f = load(t, 'test', 't', env) else f = loadstring(t) setfenv(f, env) end local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.sigmoid() -- can't use genericSingleOpTest, since `math.sigmoid` doesn't exist, have to use -- `torch.sigmoid` instead local inputValues = {-1000,-1,0,0.5,1,2,1000} local expectedOutput = {0.0000, 0.2689, 0.5, 0.6225, 0.7311, 0.8808, 1.000} local precision_4dps = 0.0002 -- float local inputFT = torch.FloatTensor(inputValues) local expectedFT = torch.FloatTensor(expectedOutput) mytester:assertlt((torch.sigmoid(inputFT) - expectedFT):abs():max(), precision_4dps, 'error in torch.sigmoid - single') mytester:assertlt((inputFT - torch.FloatTensor(inputValues)):abs():max(), precision_4dps, 'error in torch.sigmoid - single') local sigmoidFT = torch.FloatTensor(inputValues):sigmoid() mytester:assertlt((sigmoidFT - expectedFT):abs():max(), precision_4dps, 'error in torch.sigmoid - single') -- double local inputDT = torch.DoubleTensor(inputValues) local expectedDT = torch.DoubleTensor(expectedOutput) mytester:assertlt((torch.sigmoid(inputDT) - expectedDT):abs():max(), precision_4dps, 'error in torch.sigmoid - double') mytester:assertlt((inputDT - torch.DoubleTensor(inputValues)):abs():max(), precision_4dps, 'error in torch.sigmoid - double') local sigmoidDT = torch.DoubleTensor(inputValues):sigmoid() mytester:assertlt((sigmoidDT - expectedDT):abs():max(), precision_4dps, 'error in torch.sigmoid - double') end function torchtest.exp() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'exp')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.floor() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'floor')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.ceil() local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'ceil')) local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.frac() local function TH_frac(x) return math.fmod(x, 1) end local f local t = genericSingleOpTest:gsub('functionname', 'frac'):gsub('math.frac', 'TH_frac') local env = { TH_frac=TH_frac, torch=torch, math=math } if not setfenv then -- Lua 5.2 f = load(t, 'test', 't', env) else f = loadstring(t) setfenv(f, env) end local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.trunc() local function TH_trunc(x) return x - math.fmod(x, 1) end local f local t = genericSingleOpTest:gsub('functionname', 'trunc'):gsub('math.trunc', 'TH_trunc') local env = { TH_trunc=TH_trunc, torch=torch, math=math } if not setfenv then -- Lua 5.2 f = load(t, 'test', 't', env) else f = loadstring(t) setfenv(f, env) end local maxerrc, maxerrnc = f() mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous') mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous') end function torchtest.round() -- [res] torch.round([res,] x) -- contiguous local m1 = torch.randn(100,100) local res1 = torch.round(m1[{ 4,{} }]) local res2 = res1:clone():zero() for i = 1,res1:size(1) do res2[i] = math.floor(m1[4][i]+0.5) end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do err[i] = math.abs(res1[i] - res2[i]) end -- find maximum element of error local maxerrc = 0 for i = 1, err:size(1) do if err[i] > maxerrc then maxerrc = err[i] end end mytester:assertlt(maxerrc, precision, 'error in torch.round - contiguous') -- non-contiguous local m1 = torch.randn(100,100) local res1 = torch.round(m1[{ {}, 4 }]) local res2 = res1:clone():zero() for i = 1,res1:size(1) do res2[i] = math.floor(m1[i][4]+0.5) end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do err[i] = math.abs(res1[i] - res2[i]) end -- find maximum element of error local maxerrnc = 0 for i = 1, err:size(1) do if err[i] > maxerrnc then maxerrnc = err[i] end end mytester:assertlt(maxerrnc, precision, 'error in torch.round - non-contiguous') end function torchtest.max() -- torch.max([resval, resind,] x [,dim]) -- TH_TENSOR_BASE local m1 = torch.Tensor(8,2):fill(3):select(2, 1) local resval, resind = torch.max(m1, 1) mytester:assert(resind[1] == 1) -- torch.max( x ) -- contiguous local m1 = torch.randn(100,100) local res1 = torch.max(m1) local res2 = m1[1][1] for i = 1,m1:size(1) do for j = 1,m1:size(2) do if m1[i][j] > res2 then res2 = m1[i][j] end end end local err = res1 - res2 mytester:assertlt(err, precision, 'error in torch.max - contiguous') -- non-contiguous local m1 = torch.randn(10,10,10) local m2 = m1[{{}, 4, {}}] local res1 = torch.max(m2) local res2 = m2[1][1] for i = 1,m2:size(1) do for j = 1,m2:size(2) do if m2[i][j] > res2 then res2 = m2[i][j] end end end local err = res1 - res2 mytester:assertlt(err, precision, 'error in torch.max - non-contiguous') -- torch.max([resval, resind,] x ,dim]) function lua_max(t, dim) assert(t:nDimension() == 2) max_val = t:narrow(dim, 1, 1):clone() max_ind = t:narrow(dim, 1, 1):clone():long():fill(1) other = 3 - dim for i = 1, t:size(other) do for j = 1, t:size(dim) do val = t:select(other, i):select(dim, j) max = max_val:select(other, i):select(dim, 1) if val > max then max_val:select(other, i):fill(val) max_ind:select(other, i):fill(j) end end end return max_val, max_ind end local m1 = torch.randn(100,100) for dim = 1,2 do local res1val, res1ind = torch.max(m1, dim) local res2val, res2ind = lua_max(m1, dim) mytester:asserteq((res1val-res2val):abs():max(), 0, 'error in torch.max') mytester:asserteq((res1ind-res2ind):abs():max(), 0, 'error in torch.max') end -- NaNs for index in pairs{1, 5, 100} do local m1 = torch.randn(100) m1[index] = 0/0 local res1val, res1ind = torch.max(m1, 1) mytester:assert(res1val[1] ~= res1val[1], 'error in torch.max (value) - NaNs') mytester:assert(res1ind[1] == index, 'error in torch.max (index) - NaNs') local res1val = torch.max(m1) mytester:assert(res1val ~= res1val, 'error in torch.max - NaNs') end -- dim == nDim -1 local a = torch.Tensor({{1,2},{3,4}}):select(2, 1) local aval, aind = torch.max(a, 1) mytester:assert(aval[1] == 3) mytester:assert(aind[1] == 2) local b = torch.Tensor({{{1,2},{3,4}},{{5,6},{7,8}}}):select(3, 1) local bval, bind = torch.max(b, 2) mytester:assert(bval[1][1] == 3) mytester:assert(bind[1][1] == 2) mytester:assert(bval[2][1] == 7) mytester:assert(bind[2][1] == 2) end function torchtest.min() -- torch.min([resval, resind,] x [,dim]) -- torch.min( x ) -- contiguous local m1 = torch.randn(100,100) local res1 = torch.min(m1) local res2 = m1[1][1] for i = 1,m1:size(1) do for j = 1,m1:size(2) do if m1[i][j] < res2 then res2 = m1[i][j] end end end local err = res1 - res2 mytester:assertlt(err, precision, 'error in torch.min - contiguous') -- non-contiguous local m1 = torch.randn(10,10,10) local m2 = m1[{{}, 4, {}}] local res1 = torch.min(m2) local res2 = m2[1][1] for i = 1,m2:size(1) do for j = 1,m2:size(2) do if m2[i][j] < res2 then res2 = m2[i][j] end end end local err = res1 - res2 mytester:assertlt(err, precision, 'error in torch.min - non-contiguous') -- torch.max([resval, resind,] x ,dim]) function lua_min(t, dim) assert(t:nDimension() == 2) max_val = t:narrow(dim, 1, 1):clone() max_ind = t:narrow(dim, 1, 1):clone():long():fill(1) other = 3 - dim for i = 1, t:size(other) do for j = 1, t:size(dim) do val = t:select(other, i):select(dim, j) max = max_val:select(other, i):select(dim, 1) if val < max then max_val:select(other, i):fill(val) max_ind:select(other, i):fill(j) end end end return max_val, max_ind end local m1 = torch.randn(100,100) for dim = 1,2 do local res1val, res1ind = torch.min(m1, dim) local res2val, res2ind = lua_min(m1, dim) mytester:asserteq((res1val-res2val):abs():max(), 0, 'error in torch.max') mytester:asserteq((res1ind-res2ind):abs():max(), 0, 'error in torch.max') end -- NaNs for index in pairs{1, 5, 100} do local m1 = torch.randn(100) m1[index] = 0/0 local res1val, res1ind = torch.min(m1, 1) mytester:assert(res1val[1] ~= res1val[1], 'error in torch.min (value) - NaNs') mytester:assert(res1ind[1] == index, 'error in torch.min (index) - NaNs') local res1val = torch.min(m1) mytester:assert(res1val ~= res1val, 'error in torch.min - NaNs') end -- TH_TENSOR_BASE local m1 = torch.Tensor(4):fill(3) local resval, resind = torch.min(m1, 1) mytester:assert(resind[1] == 1) end function torchtest.cmax() -- Two tensors. local a = torch.rand(msize, msize) local b = torch.rand(msize, msize) local c = torch.cmax(a, b) local expected_c = torch.zeros(msize, msize) expected_c:map2(a, b, function(_, a, b) return math.max(a, b) end) mytester:assertTensorEq(expected_c, c, 0, 'error in torch.cmax(tensor, tensor)') -- Tensor and scalar. local v = torch.uniform() c = torch.cmax(a, v) expected_c:map(a, function(_, a) return math.max(a, v) end) mytester:assertTensorEq(expected_c, c, 0, 'error in torch.cmax(tensor, scalar).') end function torchtest.cmin() -- Two tensors. local a = torch.rand(msize, msize) local b = torch.rand(msize, msize) local c = torch.cmin(a, b) local expected_c = torch.zeros(msize, msize) expected_c:map2(a, b, function(_, a, b) return math.min(a, b) end) mytester:assertTensorEq(expected_c, c, 0, 'error in torch.cmin(tensor, tensor)') -- Tensor and scalar. local v = torch.uniform() c = torch.cmin(a, v) expected_c:map(a, function(_, a) return math.min(a, v) end) mytester:assertTensorEq(expected_c, c, 0, 'error in torch.cmin(tensor, scalar).') end function torchtest.lerp() local function TH_lerp(a, b, weight) return a + weight * (b-a); end local a = torch.rand(msize, msize) local b = torch.rand(msize, msize) local w = math.random() local result = torch.lerp(a, b, w) local expected = a:new() expected:map2(a, b, function(_, a, b) return TH_lerp(a, b, w) end) mytester:assertTensorEq(expected, result, precision, 'error in torch.lerp(tensor, tensor, weight)') local a = (math.random()*2-1) * 100000 local b = (math.random()*2-1) * 100000 local w = math.random() local result = torch.lerp(a, b, w) local expected = TH_lerp(a, b, w) mytester:assertalmosteq(expected, result, precision, 'error in torch.lerp(scalar, scalar, weight)') end for i, v in ipairs{{10}, {5, 5}} do torchtest['allAndAny' .. i] = function () local x = torch.ones(unpack(v)):byte() mytester:assert(x:all(), 'error in all()') mytester:assert(x:any(), 'error in any()') x[3] = 0 mytester:assert(not x:all(), 'error in all()') mytester:assert(x:any(), 'error in any()') x:zero() mytester:assert(not x:all(), 'error in all()') mytester:assert(not x:any(), 'error in any()') x:fill(2) mytester:assert(x:all(), 'error in all()') mytester:assert(x:any(), 'error in any()') end end function torchtest.mv() local m1 = torch.randn(100,100) local v1 = torch.randn(100) local res1 = torch.mv(m1,v1) local res2 = res1:clone():zero() for i = 1,m1:size(1) do for j = 1,m1:size(2) do res2[i] = res2[i] + m1[i][j] * v1[j] end end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.mv') end function torchtest.fill() local types = { 'torch.ByteTensor', 'torch.CharTensor', 'torch.ShortTensor', 'torch.IntTensor', 'torch.FloatTensor', 'torch.DoubleTensor', 'torch.LongTensor', } for k,t in ipairs(types) do -- [res] torch.fill([res,] tensor, value) local m1 = torch.ones(100,100):type(t) local res1 = m1:clone() res1[{ 3,{} }]:fill(2) local res2 = m1:clone() for i = 1,m1:size(1) do res2[{ 3,i }] = 2 end local err = (res1-res2):double():abs():max() mytester:assertlt(err, precision, 'error in torch.fill - contiguous') local m1 = torch.ones(100,100):type(t) local res1 = m1:clone() res1[{ {},3 }]:fill(2) local res2 = m1:clone() for i = 1,m1:size(1) do res2[{ i,3 }] = 2 end local err = (res1-res2):double():abs():max() mytester:assertlt(err, precision, 'error in torch.fill - non contiguous') end end function torchtest.add() local types = { 'torch.ByteTensor', 'torch.CharTensor', 'torch.ShortTensor', 'torch.IntTensor', 'torch.FloatTensor', 'torch.DoubleTensor', 'torch.LongTensor', } for k,t in ipairs(types) do -- [res] torch.add([res,] tensor1, tensor2) local m1 = torch.randn(100,100):type(t) local v1 = torch.randn(100):type(t) local res1 = torch.add(m1[{ 4,{} }],v1) local res2 = res1:clone():zero() for i = 1,m1:size(2) do res2[i] = m1[4][i] + v1[i] end local err = (res1-res2):double():abs():max() mytester:assertlt(err, precision, 'error in torch.add - contiguous' .. ' ' .. t) local m1 = torch.randn(100,100):type(t) local v1 = torch.randn(100):type(t) local res1 = torch.add(m1[{ {},4 }],v1) local res2 = res1:clone():zero() for i = 1,m1:size(1) do res2[i] = m1[i][4] + v1[i] end local err = (res1-res2):double():abs():max() mytester:assertlt(err, precision, 'error in torch.add - non contiguous' .. ' ' .. t) -- [res] torch.add([res,] tensor, value) local m1 = torch.randn(10,10):type(t) local res1 = m1:clone() res1[{ 3,{} }]:add(2) local res2 = m1:clone() for i = 1,m1:size(1) do res2[{ 3,i }] = res2[{ 3,i }] + 2 end local err = (res1-res2):double():abs():max() mytester:assertlt(err, precision, 'error in torch.add - scalar, contiguous' .. ' ' .. t) local m1 = torch.randn(10,10) local res1 = m1:clone() res1[{ {},3 }]:add(2) local res2 = m1:clone() for i = 1,m1:size(1) do res2[{ i,3 }] = res2[{ i,3 }] + 2 end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.add - scalar, non contiguous' .. ' ' .. t) -- [res] torch.add([res,] tensor1, value, tensor2) end end function torchtest.csub() local rngState = torch.getRNGState() torch.manualSeed(123) local a = torch.randn(100,90) local b = a:clone():normal() local res_add = torch.add(a, -1, b) local res_csub = a:clone() res_csub:csub(b) mytester:assertlt((res_add - res_csub):abs():max(), 0.00001) local _ = torch.setRNGState(rngState) end function torchtest.csub_scalar() local rngState = torch.getRNGState() torch.manualSeed(123) local a = torch.randn(100,100) local scalar = 123.5 local res_add = torch.add(a, -scalar) local res_csub = a:clone() res_csub:csub(scalar) mytester:assertlt((res_add - res_csub):abs():max(), 0.00001) local _ = torch.setRNGState(rngState) end function torchtest.neg() local rngState = torch.getRNGState() torch.manualSeed(123) local a = torch.randn(100,90) local zeros = torch.Tensor():resizeAs(a):zero() local res_add = torch.add(zeros, -1, a) local res_neg = a:clone() res_neg:neg() mytester:assertlt((res_add - res_neg):abs():max(), 0.00001) local _ = torch.setRNGState(rngState) end function torchtest.cinv() local rngState = torch.getRNGState() torch.manualSeed(123) local a = torch.randn(100,89) local zeros = torch.Tensor():resizeAs(a):zero() local res_pow = torch.pow(a, -1) local res_inv = a:clone() res_inv:cinv() mytester:assertlt((res_pow - res_inv):abs():max(), 0.00001) local _ = torch.setRNGState(rngState) end function torchtest.mul() local types = { 'torch.ByteTensor', 'torch.CharTensor', 'torch.ShortTensor', 'torch.IntTensor', 'torch.FloatTensor', 'torch.DoubleTensor', 'torch.LongTensor', } for k,t in ipairs(types) do local m1 = torch.randn(10,10):type(t) local res1 = m1:clone() res1[{ {},3 }]:mul(2) local res2 = m1:clone() for i = 1,m1:size(1) do res2[{ i,3 }] = res2[{ i,3 }] * 2 end local err = (res1-res2):double():abs():max() mytester:assertlt(err, precision, 'error in torch.mul - scalar, non contiguous' .. ' ' .. t) end end function torchtest.div() local types = { 'torch.ByteTensor', 'torch.CharTensor', 'torch.ShortTensor', 'torch.IntTensor', 'torch.FloatTensor', 'torch.DoubleTensor', 'torch.LongTensor', } for k,t in ipairs(types) do local m1 = torch.Tensor(10,10):uniform(0,10):type(t) local res1 = m1:clone() res1[{ {},3 }]:div(2) local res2 = m1:clone() for i = 1,m1:size(1) do local ok = pcall(function() res2[{ i,3 }] = res2[{ i,3 }] / 2 end) if not ok then res2[{ i,3 }] = torch.floor(res2[{ i,3 }] / 2) end end local err = (res1-res2):double():abs():max() mytester:assertlt(err, precision, 'error in torch.div - scalar, non contiguous' .. ' ' .. t) end end function torchtest.lshift() local m1 = torch.LongTensor(10,10):random(0,100) local res1 = m1:clone() local q = 2 local f = math.pow(2, q) res1[{ {},3 }]:lshift(q) local res2 = m1:clone() for i = 1,m1:size(1) do res2[{ i,3 }] = res2[{ i,3 }] * f end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.lshift - scalar, non contiguous') local m1 = torch.LongTensor(10,10):random(0,100) local res1 = m1:clone() local q = 2 res1:lshift(q) local res2 = m1:clone() for i = 1,m1:size(1) do for j = 1,m1:size(1) do res2[{ i,j }] = res2[{ i,j }] * f end end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.lshift - scalar, contiguous') end function torchtest.rshift() local m1 = torch.LongTensor(10,10):random(0,100) local res1 = m1:clone() local q = 2 local f = math.pow(2, q) res1[{ {},3 }]:rshift(q) local res2 = m1:clone() for i = 1,m1:size(1) do res2[{ i,3 }] = math.floor(res2[{ i,3 }] / f) end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.rshift - scalar, non contiguous') local m1 = torch.LongTensor(10,10):random(0,100) local res1 = m1:clone() local q = 2 res1:rshift(q) local res2 = m1:clone() for i = 1,m1:size(1) do for j = 1,m1:size(1) do res2[{ i,j }] = math.floor(res2[{ i,j }] / f) end end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.rshift - scalar, contiguous') end function torchtest.fmod() local m1 = torch.Tensor(10,10):uniform(-10, 10) local res1 = m1:clone() local q = 2.1 res1[{ {},3 }]:fmod(q) local res2 = m1:clone() for i = 1,m1:size(1) do res2[{ i,3 }] = math.fmod(res2[{ i,3 }], q) end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.fmod - scalar, non contiguous') end function torchtest.remainder() local m1 = torch.Tensor(10, 10):uniform(-10, 10) local res1 = m1:clone() local q = 2.1 res1[{ {},3 }]:remainder(q) local res2 = m1:clone() for i = 1,m1:size(1) do res2[{ i,3 }] = res2[{ i,3 }] % q end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.remainder - scalar, non contiguous') end function torchtest.bitand() local m1 = torch.LongTensor(10,10):random(0,100) local res1 = m1:clone() local val = 32 -- This should be a power of 2 res1[{ {},3 }]:bitand(val - 1) local res2 = m1:clone() for i = 1,m1:size(1) do res2[{ i,3 }] = res2[{ i,3 }] % val end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.bitand - scalar, non contiguous') local m1 = torch.LongTensor(10,10):random(0,100) local res1 = m1:clone() res1:bitand(val - 1) local res2 = m1:clone() for i = 1,m1:size(1) do for j = 1,m1:size(1) do res2[{ i,j }] = res2[{ i,j }] % val end end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.bitand - scalar, contiguous') end function torchtest.bitor() local m1 = torch.LongTensor(10,10):random(0,10000) local res1 = m1:clone() local val = 32 -- This should be a power of 2 res1[{ {},3 }]:bitor(val-1) local res2 = m1:clone() for i = 1,m1:size(1) do res2[{ i,3 }] = math.floor(res2[{ i,3 }] / val) * val + (val - 1) end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.bitor - scalar, non contiguous') local m1 = torch.LongTensor(10,10):random(0,10000) local res1 = m1:clone() res1:bitor(val - 1) local res2 = m1:clone() for i = 1,m1:size(1) do for j = 1,m1:size(1) do res2[{ i,j }] = math.floor(res2[{ i,j }] / val) * val + (val - 1) end end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.bitor - scalar, contiguous') end function torchtest.cbitxor() local t1 = torch.LongTensor(10,10):random(0,10000) local t2 = torch.LongTensor(10,10):random(10001,20000) -- Perform xor swap and check results local t3 = torch.cbitxor(t1, t2) local r1 = torch.cbitxor(t3, t2) local r2 = torch.cbitxor(t3, t1) local err1 = (r1 - t1):abs():max() local err2 = (r2 - t2):abs():max() mytester:assertlt(err1 + err2, precision, 'error in torch.cbitxor contiguous') end function torchtest.mm() -- helper function local function matrixmultiply(mat1,mat2) local n = mat1:size(1) local m = mat1:size(2) local p = mat2:size(2) local res = torch.zeros(n,p) for i = 1, n do for j = 1, p do local sum = 0 for k = 1, m do sum = sum + mat1[i][k]*mat2[k][j] end res[i][j] = sum end end return res end -- contiguous case local n, m, p = 10, 10, 5 local mat1 = torch.randn(n,m) local mat2 = torch.randn(m,p) local res = torch.mm(mat1,mat2) local res2 = matrixmultiply(mat1,mat2) mytester:assertTensorEq(res,res2,precision,'error in torch.mm') -- non contiguous case 1 local n, m, p = 10, 10, 5 local mat1 = torch.randn(n,m) local mat2 = torch.randn(p,m):t() local res = torch.mm(mat1,mat2) local res2 = matrixmultiply(mat1,mat2) mytester:assertTensorEq(res,res2,precision,'error in torch.mm, non contiguous') -- non contiguous case 2 local n, m, p = 10, 10, 5 local mat1 = torch.randn(m,n):t() local mat2 = torch.randn(m,p) local res = torch.mm(mat1,mat2) local res2 = matrixmultiply(mat1,mat2) mytester:assertTensorEq(res,res2,precision,'error in torch.mm, non contiguous') -- non contiguous case 3 local n, m, p = 10, 10, 5 local mat1 = torch.randn(m,n):t() local mat2 = torch.randn(p,m):t() local res = torch.mm(mat1,mat2) local res2 = matrixmultiply(mat1,mat2) mytester:assertTensorEq(res,res2,precision,'error in torch.mm, non contiguous') -- test with zero stride local n, m, p = 10, 10, 5 local mat1 = torch.randn(n,m) local mat2 = torch.randn(m,1):expand(m,p) local res = torch.mm(mat1,mat2) local res2 = matrixmultiply(mat1,mat2) mytester:assertTensorEq(res,res2,precision,'error in torch.mm, non contiguous, zero stride') end function torchtest.bmm() local num_batches = 10 local M, N, O = 23, 8, 12 local b1 = torch.randn(num_batches, M, N) local b2 = torch.randn(num_batches, N, O) local res = torch.bmm(b1, b2) for i = 1, num_batches do local r = torch.mm(b1[i], b2[i]) mytester:assertTensorEq(r, res[i], precision, 'result matrix ' .. i .. ' wrong') end end function torchtest.addbmm() local num_batches = 10 local M, N, O = 12, 8, 5 local b1 = torch.randn(num_batches, M, N) local b2 = torch.randn(num_batches, N, O) local res = torch.bmm(b1, b2) local res2 = torch.Tensor():resizeAs(res[1]):zero() res2:addbmm(b1,b2) mytester:assertTensorEq(res2, res:sum(1)[1], precision, 'addbmm result wrong') res2:addbmm(1,b1,b2) mytester:assertTensorEq(res2, res:sum(1)[1]*2, precision, 'addbmm result wrong') res2:addbmm(1,res2,.5,b1,b2) mytester:assertTensorEq(res2, res:sum(1)[1]*2.5, precision, 'addbmm result wrong') local res3 = torch.addbmm(1,res2,0,b1,b2) mytester:assertTensorEq(res3, res2, precision, 'addbmm result wrong') local res4 = torch.addbmm(1,res2,.5,b1,b2) mytester:assertTensorEq(res4, res:sum(1)[1]*3, precision, 'addbmm result wrong') local res5 = torch.addbmm(0,res2,1,b1,b2) mytester:assertTensorEq(res5, res:sum(1)[1], precision, 'addbmm result wrong') local res6 = torch.addbmm(.1,res2,.5,b1,b2) mytester:assertTensorEq(res6, res2*.1 + res:sum(1)*.5, precision, 'addbmm result wrong') end function torchtest.baddbmm() local num_batches = 10 local M, N, O = 12, 8, 5 local b1 = torch.randn(num_batches, M, N) local b2 = torch.randn(num_batches, N, O) local res = torch.bmm(b1, b2) local res2 = torch.Tensor():resizeAs(res):zero() res2:baddbmm(b1,b2) mytester:assertTensorEq(res2, res, precision, 'baddbmm result wrong') res2:baddbmm(1,b1,b2) mytester:assertTensorEq(res2, res*2, precision, 'baddbmm result wrong') res2:baddbmm(1,res2,.5,b1,b2) mytester:assertTensorEq(res2, res*2.5, precision, 'baddbmm result wrong') local res3 = torch.baddbmm(1,res2,0,b1,b2) mytester:assertTensorEq(res3, res2, precision, 'baddbmm result wrong') local res4 = torch.baddbmm(1,res2,.5,b1,b2) mytester:assertTensorEq(res4, res*3, precision, 'baddbmm result wrong') local res5 = torch.baddbmm(0,res2,1,b1,b2) mytester:assertTensorEq(res5, res, precision, 'baddbmm result wrong') local res6 = torch.baddbmm(.1,res2,.5,b1,b2) mytester:assertTensorEq(res6, res2*.1 + res*.5, precision, 'baddbmm result wrong') end function torchtest.clamp() local m1 = torch.rand(100):mul(5):add(-2.5) -- uniform in [-2.5, 2.5] -- just in case we're extremely lucky: local min_val = -1 local max_val = 1 m1[1] = min_val m1[2] = max_val local res1 = m1:clone() res1:clamp(min_val, max_val) local res2 = m1:clone() for i = 1,m1:size(1) do if res2[i] > max_val then res2[i] = max_val elseif res2[i] < min_val then res2[i] = min_val end end local err = (res1-res2):abs():max() mytester:assertlt(err, precision, 'error in torch.clamp - scalar, non contiguous') end function torchtest.pow() -- [res] torch.pow([res,] x) -- base - tensor, exponent - number -- contiguous local m1 = torch.randn(100,100) local res1 = torch.pow(m1[{ 4,{} }], 3) local res2 = res1:clone():zero() for i = 1,res1:size(1) do res2[i] = math.pow(m1[4][i], 3) end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do err[i] = math.abs(res1[i] - res2[i]) end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do if err[i] > maxerr then maxerr = err[i] end end mytester:assertlt(maxerr, precision, 'error in torch.pow - contiguous') -- non-contiguous local m1 = torch.randn(100,100) local res1 = torch.pow(m1[{ {}, 4 }], 3) local res2 = res1:clone():zero() for i = 1,res1:size(1) do res2[i] = math.pow(m1[i][4], 3) end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do err[i] = math.abs(res1[i] - res2[i]) end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do if err[i] > maxerr then maxerr = err[i] end end mytester:assertlt(maxerr, precision, 'error in torch.pow - non-contiguous') -- base - number, exponent - tensor -- contiguous local m1 = torch.randn(100,100) local res1 = torch.pow(3, m1[{ 4,{} }]) local res2 = res1:clone():zero() for i = 1,res1:size(1) do res2[i] = math.pow(3, m1[4][i]) end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do err[i] = math.abs(res1[i] - res2[i]) end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do if err[i] > maxerr then maxerr = err[i] end end mytester:assertlt(maxerr, precision, 'error in torch.pow - contiguous') -- non-contiguous local m1 = torch.randn(100,100) local res1 = torch.pow(3, m1[{ {}, 4 }]) local res2 = res1:clone():zero() for i = 1,res1:size(1) do res2[i] = math.pow(3, m1[i][4]) end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do err[i] = math.abs(res1[i] - res2[i]) end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do if err[i] > maxerr then maxerr = err[i] end end mytester:assertlt(maxerr, precision, 'error in torch.pow - non-contiguous') end function torchtest.cdiv() local types = { 'torch.ByteTensor', 'torch.CharTensor', 'torch.ShortTensor', 'torch.IntTensor', 'torch.FloatTensor', 'torch.DoubleTensor', 'torch.LongTensor', } for k,t in ipairs(types) do -- [res] torch.cdiv([res,] tensor1, tensor2) -- contiguous local m1 = torch.Tensor(10, 10, 10):uniform(0,10):type(t) local m2 = torch.Tensor(10, 10 * 10):uniform(0,10):type(t) m2[m2:eq(0)] = 2 local sm1 = m1[{4, {}, {}}] local sm2 = m2[{4, {}}] local res1 = torch.cdiv(sm1, sm2) local res2 = res1:clone():zero() for i = 1,sm1:size(1) do for j = 1, sm1:size(2) do local idx1d = (((i-1)*sm1:size(1)))+j local ok = pcall(function() res2[i][j] = sm1[i][j] / sm2[idx1d] end) if not ok then res2[i][j] = torch.floor(sm1[i][j] / sm2[idx1d]) end end end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do for j = 1, res1:size(2) do err[i][j] = math.abs(res1[i][j] - res2[i][j]) end end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do for j = 1, err:size(2) do if err[i][j] > maxerr then maxerr = err[i][j] end end end mytester:assertlt(maxerr, precision, 'error in torch.cdiv - contiguous' .. ' ' .. t) -- non-contiguous local m1 = torch.Tensor(10, 10, 10):uniform(0,10):type(t) local m2 = torch.Tensor(10 * 10, 10 * 10):uniform(0,10):type(t) m2[m2:eq(0)] = 2 local sm1 = m1[{{}, 4, {}}] local sm2 = m2[{{}, 4}] local res1 = torch.cdiv(sm1, sm2) local res2 = res1:clone():zero() for i = 1,sm1:size(1) do for j = 1, sm1:size(2) do local idx1d = (((i-1)*sm1:size(1)))+j local ok = pcall(function() res2[i][j] = sm1[i][j] / sm2[idx1d] end) if not ok then res2[i][j] = torch.floor(sm1[i][j] / sm2[idx1d]) end end end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do for j = 1, res1:size(2) do err[i][j] = math.abs(res1[i][j] - res2[i][j]) end end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do for j = 1, err:size(2) do if err[i][j] > maxerr then maxerr = err[i][j] end end end mytester:assertlt(maxerr, precision, 'error in torch.cdiv - non-contiguous' .. ' ' .. t) end end function torchtest.cfmod() -- contiguous local m1 = torch.Tensor(10, 10, 10):uniform(-10, 10) local m2 = torch.Tensor(10, 10 * 10):uniform(-3, 3) local sm1 = m1[{4, {}, {}}] local sm2 = m2[{4, {}}] local res1 = torch.cfmod(sm1, sm2) local res2 = res1:clone():zero() for i = 1,sm1:size(1) do for j = 1, sm1:size(2) do local idx1d = (((i-1)*sm1:size(1)))+j res2[i][j] = math.fmod(sm1[i][j], sm2[idx1d]) end end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do for j = 1, res1:size(2) do err[i][j] = math.abs(res1[i][j] - res2[i][j]) end end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do for j = 1, err:size(2) do if err[i][j] > maxerr then maxerr = err[i][j] end end end mytester:assertlt(maxerr, precision, 'error in torch.cfmod - contiguous') -- non-contiguous local m1 = torch.Tensor(10, 10, 10):uniform(-10, 10) local m2 = torch.Tensor(10 * 10, 10 * 10):uniform(-3, 3) local sm1 = m1[{{}, 4, {}}] local sm2 = m2[{{}, 4}] local res1 = torch.cfmod(sm1, sm2) local res2 = res1:clone():zero() for i = 1,sm1:size(1) do for j = 1, sm1:size(2) do local idx1d = (((i-1)*sm1:size(1)))+j res2[i][j] = math.fmod(sm1[i][j], sm2[idx1d]) end end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do for j = 1, res1:size(2) do err[i][j] = math.abs(res1[i][j] - res2[i][j]) end end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do for j = 1, err:size(2) do if err[i][j] > maxerr then maxerr = err[i][j] end end end mytester:assertlt(maxerr, precision, 'error in torch.cfmod - non-contiguous') end function torchtest.cremainder() -- contiguous local m1 = torch.Tensor(10, 10, 10):uniform(-10, 10) local m2 = torch.Tensor(10, 10 * 10):uniform(-3, 3) local sm1 = m1[{4, {}, {}}] local sm2 = m2[{4, {}}] local res1 = torch.cremainder(sm1, sm2) local res2 = res1:clone():zero() for i = 1,sm1:size(1) do for j = 1, sm1:size(2) do local idx1d = (((i-1)*sm1:size(1)))+j res2[i][j] = sm1[i][j] % sm2[idx1d] end end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do for j = 1, res1:size(2) do err[i][j] = math.abs(res1[i][j] - res2[i][j]) end end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do for j = 1, err:size(2) do if err[i][j] > maxerr then maxerr = err[i][j] end end end mytester:assertlt(maxerr, precision, 'error in torch.cremainder - contiguous') -- non-contiguous local m1 = torch.Tensor(10, 10, 10):uniform(-10, 10) local m2 = torch.Tensor(10 * 10, 10 * 10):uniform(-3, 3) local sm1 = m1[{{}, 4, {}}] local sm2 = m2[{{}, 4}] local res1 = torch.cremainder(sm1, sm2) local res2 = res1:clone():zero() for i = 1,sm1:size(1) do for j = 1, sm1:size(2) do local idx1d = (((i-1)*sm1:size(1)))+j res2[i][j] = sm1[i][j] % sm2[idx1d] end end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do for j = 1, res1:size(2) do err[i][j] = math.abs(res1[i][j] - res2[i][j]) end end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do for j = 1, err:size(2) do if err[i][j] > maxerr then maxerr = err[i][j] end end end mytester:assertlt(maxerr, precision, 'error in torch.cremainder - non-contiguous') end function torchtest.cmul() local types = { 'torch.ByteTensor', 'torch.CharTensor', 'torch.ShortTensor', 'torch.IntTensor', 'torch.FloatTensor', 'torch.DoubleTensor', 'torch.LongTensor', } for k,t in ipairs(types) do -- [res] torch.cmul([res,] tensor1, tensor2) -- contiguous local m1 = torch.randn(10, 10, 10):type(t) local m2 = torch.randn(10, 10 * 10):type(t) local sm1 = m1[{4, {}, {}}] local sm2 = m2[{4, {}}] local res1 = torch.cmul(sm1, sm2) local res2 = res1:clone():zero() for i = 1,sm1:size(1) do for j = 1, sm1:size(2) do local idx1d = (((i-1)*sm1:size(1)))+j res2[i][j] = sm1[i][j] * sm2[idx1d] end end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do for j = 1, res1:size(2) do err[i][j] = math.abs(res1[i][j] - res2[i][j]) end end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do for j = 1, err:size(2) do if err[i][j] > maxerr then maxerr = err[i][j] end end end mytester:assertlt(maxerr, precision, 'error in torch.cmul - contiguous' .. ' ' .. t) -- non-contiguous local m1 = torch.randn(10, 10, 10):type(t) local m2 = torch.randn(10 * 10, 10 * 10):type(t) local sm1 = m1[{{}, 4, {}}] local sm2 = m2[{{}, 4}] local res1 = torch.cmul(sm1, sm2) local res2 = res1:clone():zero() for i = 1,sm1:size(1) do for j = 1, sm1:size(2) do local idx1d = (((i-1)*sm1:size(1)))+j res2[i][j] = sm1[i][j] * sm2[idx1d] end end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do for j = 1, res1:size(2) do err[i][j] = math.abs(res1[i][j] - res2[i][j]) end end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do for j = 1, err:size(2) do if err[i][j] > maxerr then maxerr = err[i][j] end end end mytester:assertlt(maxerr, precision, 'error in torch.cmul - non-contiguous' .. ' ' .. t) end end function torchtest.cpow() -- [res] torch.cpow([res,] tensor1, tensor2) -- contiguous local m1 = torch.rand(10, 10, 10) local m2 = torch.rand(10, 10 * 10) local sm1 = m1[{4, {}, {}}] local sm2 = m2[{4, {}}] local res1 = torch.cpow(sm1, sm2) local res2 = res1:clone():zero() for i = 1,sm1:size(1) do for j = 1, sm1:size(2) do local idx1d = (((i-1)*sm1:size(1)))+j res2[i][j] = math.pow(sm1[i][j], sm2[idx1d]) end end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do for j = 1, res1:size(2) do err[i][j] = math.abs(res1[i][j] - res2[i][j]) end end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do for j = 1, err:size(2) do if err[i][j] > maxerr then maxerr = err[i][j] end end end mytester:assertlt(maxerr, precision, 'error in torch.cpow - contiguous') -- non-contiguous local m1 = torch.rand(10, 10, 10) local m2 = torch.rand(10 * 10, 10 * 10) local sm1 = m1[{{}, 4, {}}] local sm2 = m2[{{}, 4}] local res1 = torch.cpow(sm1, sm2) local res2 = res1:clone():zero() for i = 1,sm1:size(1) do for j = 1, sm1:size(2) do local idx1d = (((i-1)*sm1:size(1)))+j res2[i][j] = math.pow(sm1[i][j],sm2[idx1d]) end end local err = res1:clone():zero() -- find absolute error for i = 1, res1:size(1) do for j = 1, res1:size(2) do err[i][j] = math.abs(res1[i][j] - res2[i][j]) end end -- find maximum element of error local maxerr = 0 for i = 1, err:size(1) do for j = 1, err:size(2) do if err[i][j] > maxerr then maxerr = err[i][j] end end end mytester:assertlt(maxerr, precision, 'error in torch.cpow - non-contiguous') end function torchtest.sum() local x = torch.rand(msize,msize) local mx = torch.sum(x,2) local mxx = torch.Tensor() torch.sum(mxx,x,2) mytester:asserteq(maxdiff(mx,mxx),0,'torch.sum value') local y = torch.rand(5, 5, 5) for i=1,3 do local a = y:sum(i) local b = y:narrow(i, 1, 1):clone():zero() for j = 1, 5 do b:add(y:narrow(i, j, 1)) end mytester:asserteq(maxdiff(a, b), 0, 'torch.sum value') end end function torchtest.prod() local x = torch.rand(msize,msize) local mx = torch.prod(x,2) local mxx = torch.Tensor() torch.prod(mxx,x,2) mytester:asserteq(maxdiff(mx,mxx),0,'torch.prod value') local y = torch.rand(5, 5, 5) for i=1,3 do local a = y:prod(i) local b = y:narrow(i, 1, 1):clone():fill(1) for j = 1, 5 do b:cmul(y:narrow(i, j, 1)) end mytester:asserteq(maxdiff(a, b), 0, 'torch.sum value') end end function torchtest.cumsum() local x = torch.rand(msize,msize) local mx = torch.cumsum(x,2) local mxx = torch.Tensor() torch.cumsum(mxx,x,2) mytester:asserteq(maxdiff(mx,mxx),0,'torch.cumsum value') end function torchtest.cumprod() local x = torch.rand(msize,msize) local mx = torch.cumprod(x,2) local mxx = torch.Tensor() torch.cumprod(mxx,x,2) mytester:asserteq(maxdiff(mx,mxx),0,'torch.cumprod value') end function torchtest.cross() local x = torch.rand(msize,3,msize) local y = torch.rand(msize,3,msize) local mx = torch.cross(x,y) local mxx = torch.Tensor() torch.cross(mxx,x,y) mytester:asserteq(maxdiff(mx,mxx),0,'torch.cross value') end function torchtest.zeros() local mx = torch.zeros(msize,msize) local mxx = torch.Tensor() torch.zeros(mxx,msize,msize) mytester:asserteq(maxdiff(mx,mxx),0,'torch.zeros value') end function torchtest.histc() local x = torch.Tensor{ 2, 4, 2, 2, 5, 4 } local y = torch.histc(x, 5, 1, 5) -- nbins, min, max local z = torch.Tensor{ 0, 3, 0, 2, 1 } mytester:assertTensorEq(y,z,precision,'error in torch.histc') end function torchtest.bhistc() local x = torch.Tensor(3, 6) x[1] = torch.Tensor{ 2, 4, 2, 2, 5, 4 } x[2] = torch.Tensor{ 3, 5, 1, 5, 3, 5 } x[3] = torch.Tensor{ 3, 4, 2, 5, 5, 1 } local y = torch.bhistc(x, 5, 1, 5) -- nbins, min, max local z = torch.Tensor(3, 5) z[1] = torch.Tensor{ 0, 3, 0, 2, 1 } z[2] = torch.Tensor{ 1, 0, 2, 0, 3 } z[3] = torch.Tensor{ 1, 1, 1, 1, 2 } mytester:assertTensorEq(y,z,precision,'error in torch.bhistc in last dimension') end function torchtest.ones() local mx = torch.ones(msize,msize) local mxx = torch.Tensor() torch.ones(mxx,msize,msize) mytester:asserteq(maxdiff(mx,mxx),0,'torch.ones value') end function torchtest.diag() local x = torch.rand(msize,msize) local mx = torch.diag(x) local mxx = torch.Tensor() torch.diag(mxx,x) mytester:asserteq(maxdiff(mx,mxx),0,'torch.diag value') end function torchtest.eye() local mx = torch.eye(msize,msize) local mxx = torch.Tensor() torch.eye(mxx,msize,msize) mytester:asserteq(maxdiff(mx,mxx),0,'torch.eye value') end function torchtest.renorm() local m1 = torch.randn(10,5) local res1 = torch.Tensor() local m2 local function renorm(matrix, value, dim, max_norm) local m1 = matrix:transpose(dim, 1):contiguous() -- collapse non-dim dimensions: m2 = m1:reshape(m1:size(1), m1:nElement()/m1:size(1)) local norms = m2:norm(value,2) -- clip local new_norms = norms:clone() new_norms[torch.gt(norms, max_norm)] = max_norm new_norms:cdiv(norms:add(1e-7)) -- renormalize m1:cmul(new_norms:expandAs(m1)) return m1:transpose(dim, 1) end -- note that the axis fed to torch.renorm is different (2~=1) local maxnorm = m1:norm(2,1):mean() m2 = renorm(m1,2,2,maxnorm) m1:renorm(2,2,maxnorm) mytester:assertTensorEq(m1, m2, 0.00001, 'error in renorm') mytester:assertTensorEq(m1:norm(2,1), m2:norm(2,1), 0.00001, 'error in renorm') m1 = torch.randn(3,4,5) m2 = m1:transpose(2,3):contiguous():reshape(15,4) maxnorm = m2:norm(2,1):mean() m2 = renorm(m2,2,2,maxnorm) m1:renorm(2,2,maxnorm) local m3 = m1:transpose(2,3):contiguous():reshape(15,4) mytester:assertTensorEq(m3, m2, 0.00001, 'error in renorm') mytester:assertTensorEq(m3:norm(2,1), m2:norm(2,1), 0.00001, 'error in renorm') end function torchtest.multinomialwithreplacement() local n_row = 3 for n_col=4,5 do local t=os.time() torch.manualSeed(t) local prob_dist = torch.rand(n_row,n_col) prob_dist:select(2,n_col):fill(0) --index n_col shouldn't be sampled local n_sample = n_col local sample_indices = torch.multinomial(prob_dist, n_sample, true) mytester:assert(prob_dist:dim() == 2, "wrong number of prob_dist dimensions") mytester:assert(sample_indices:size(2) == n_sample, "wrong number of samples") for i=1,n_row do for j=1,n_sample do mytester:assert(sample_indices[{i,j}] ~= n_col, "sampled an index with zero probability") end end end end function torchtest.multinomialwithoutreplacement() local n_row = 3 for n_col=4,5 do local t=os.time() torch.manualSeed(t) local prob_dist = torch.rand(n_row,n_col) prob_dist:select(2,n_col):fill(0) --index n_col shouldn't be sampled local n_sample = 3 local sample_indices = torch.multinomial(prob_dist, n_sample, false) mytester:assert(prob_dist:dim() == 2, "wrong number of prob_dist dimensions") mytester:assert(sample_indices:size(2) == n_sample, "wrong number of samples") for i=1,n_row do local row_samples = {} for j=1,n_sample do local sample_idx = sample_indices[{i,j}] mytester:assert( sample_idx ~= n_col, "sampled an index with zero probability" ) mytester:assert( not row_samples[sample_idx], "sampled an index twice" ) row_samples[sample_idx] = true end end end end function torchtest.aliasMultinomial() for i =1,5 do local n_class = 5 local t=os.time() torch.manualSeed(t) local probs = torch.Tensor(n_class):uniform(0,1) probs:div(probs:sum()) local output = torch.LongTensor(1000, 10000) local n_samples = output:nElement() local prob_state = torch.multinomialAliasSetup(probs) mytester:assert(prob_state[1]:min() > 0, "Index ="..prob_state[1]:min().."alias indices has an index below or equal to 0") mytester:assert(prob_state[1]:max() <= n_class, prob_state[1]:max().." alias indices has an index exceeding num_class") local prob_state = torch.multinomialAliasSetup(probs, prob_state) mytester:assert(prob_state[1]:min() > 0, "Index ="..prob_state[1]:min().."alias indices has an index below or equal to 0(cold)") mytester:assert(prob_state[1]:max() <= n_class, prob_state[1]:max()..","..prob_state[1]:min().." alias indices has an index exceeding num_class(cold)") local output = torch.LongTensor(n_samples) output = torch.multinomialAlias(output, prob_state) mytester:assert(output:nElement() == n_samples, "wrong number of samples") mytester:assert(output:min() > 0, "sampled indices has an index below or equal to 0") mytester:assert(output:max() <= n_class, "indices has an index exceeding num_class") end end function torchtest.multinomialvector() local n_col = 4 local t=os.time() torch.manualSeed(t) local prob_dist = torch.rand(n_col) local n_sample = n_col local sample_indices = torch.multinomial(prob_dist, n_sample, true) local s_dim = sample_indices:dim() mytester:assert(s_dim == 1, "wrong number of returned dimensions: "..s_dim) mytester:assert(prob_dist:dim() == 1, "wrong number of prob_dist dimensions") mytester:assert(sample_indices:size(1) == n_sample, "wrong number of samples") end function torchtest.range() local mx = torch.range(0,1) local mxx = torch.Tensor() torch.range(mxx,0,1) mytester:asserteq(maxdiff(mx,mxx),0,'torch.range value') -- Check range for non-contiguous tensors. local x = torch.zeros(2, 3) local y = x:narrow(2, 2, 2) y:range(0, 3) mytester:assertTensorEq(x, torch.Tensor{{0, 0, 1}, {0, 2, 3}}, 1e-16, 'non-contiguous range failed') end function torchtest.rangenegative() local mx = torch.Tensor({1,0}) local mxx = torch.Tensor() torch.range(mxx,1,0,-1) mytester:asserteq(maxdiff(mx,mxx),0,'torch.range value for negative step') end function torchtest.rangeequalbounds() local mx = torch.Tensor({1}) local mxx = torch.Tensor() torch.range(mxx,1,1,-1) mytester:asserteq(maxdiff(mx,mxx),0,'torch.range value for equal bounds step') torch.range(mxx,1,1,1) mytester:asserteq(maxdiff(mx,mxx),0,'torch.range value for equal bounds step') end function torchtest.rangefloat() local mx = torch.FloatTensor():range(0.6, 0.9, 0.1) mytester:asserteq(mx:size(1), 4, 'wrong size for FloatTensor range') mx = torch.FloatTensor():range(1, 10, 0.3) mytester:asserteq(mx:size(1), 31, 'wrong size for FloatTensor range') end function torchtest.rangedouble() local mx = torch.DoubleTensor():range(0.6, 0.9, 0.1) mytester:asserteq(mx:size(1), 4, 'wrong size for DoubleTensor range') mx = torch.DoubleTensor():range(1, 10, 0.3) mytester:asserteq(mx:size(1), 31, 'wrong size for DoubleTensor range') end function torchtest.randperm() local t=os.time() torch.manualSeed(t) local mx = torch.randperm(msize) local mxx = torch.Tensor() torch.manualSeed(t) torch.randperm(mxx,msize) mytester:asserteq(maxdiff(mx,mxx),0,'torch.randperm value') end function torchtest.reshape() local x = torch.rand(10,13,23) local mx = torch.reshape(x,130,23) local mxx = torch.Tensor() torch.reshape(mxx,x,130,23) mytester:asserteq(maxdiff(mx,mxx),0,'torch.reshape value') end local function assertIsOrdered(order, x, mxx, ixx, task) local areOrdered if order == 'descending' then areOrdered = function(a, b) return a >= b end elseif order == 'ascending' then areOrdered = function(a, b) return a <= b end else error('unknown order "' .. order .. '", must be "ascending" or "descending"') end local decreasing = true for j = 1,msize do for k = 2,msize do decreasing = decreasing and areOrdered(mxx[j][k-1], mxx[j][k]) end end mytester:assert(decreasing, 'torch.sort (' .. order .. ') values unordered for ' .. task) local seen = torch.ByteTensor(msize) local indicesCorrect = true for k = 1,msize do seen:zero() for j = 1,msize do indicesCorrect = indicesCorrect and (x[k][ixx[k][j]] == mxx[k][j]) seen[ixx[k][j]] = 1 end indicesCorrect = indicesCorrect and (torch.sum(seen) == msize) end mytester:assert(indicesCorrect, 'torch.sort (' .. order .. ') indices wrong for ' .. task) end function torchtest.sortAscending() local x = torch.rand(msize,msize) local mx,ix = torch.sort(x) -- Test use of result tensor local mxx = torch.Tensor() local ixx = torch.LongTensor() torch.sort(mxx,ixx,x) mytester:asserteq(maxdiff(mx,mxx),0,'torch.sort (ascending) value') mytester:asserteq(maxdiff(ix,ixx),0,'torch.sort (ascending) index') -- Test sorting of random numbers assertIsOrdered('ascending', x, mxx, ixx, 'random') mytester:assertTensorEq( torch.sort(torch.Tensor{ 50, 40, 30, 20, 10 }), torch.Tensor{ 10, 20, 30, 40, 50 }, 1e-16, "torch.sort (ascending) simple sort" ) -- Test that we still have proper sorting with duplicate keys local x = torch.floor(torch.rand(msize,msize)*10) torch.sort(mxx,ixx,x) assertIsOrdered('ascending', x, mxx, ixx, 'random with duplicate keys') end function torchtest.sortDescending() local x = torch.rand(msize,msize) local mx,ix = torch.sort(x,true) -- Test use of result tensor local mxx = torch.Tensor() local ixx = torch.LongTensor() torch.sort(mxx,ixx,x,true) mytester:asserteq(maxdiff(mx,mxx),0,'torch.sort (descending) value') mytester:asserteq(maxdiff(ix,ixx),0,'torch.sort (descending) index') -- Test sorting of random numbers assertIsOrdered('descending', x, mxx, ixx, 'random') -- Test simple sort task mytester:assertTensorEq( torch.sort(torch.Tensor{ 10, 20, 30, 40, 50 },true), torch.Tensor{ 50, 40, 30, 20, 10 }, 1e-16, "torch.sort (descending) simple sort" ) -- Test that we still have proper sorting with duplicate keys assertIsOrdered('descending', x, mxx, ixx, 'random with duplicate keys') end function torchtest.topK() local function topKViaSort(t, k, dim, dir) local sorted, indices = t:sort(dim, dir) return sorted:narrow(dim, 1, k), indices:narrow(dim, 1, k) end local function compareTensors(t, res1, ind1, res2, ind2, dim, msg) -- Values should be exactly equivalent mytester:assertTensorEq(res1, res2, 0, msg) -- Indices might differ based on the implementation, since there is -- no guarantee of the relative order of selection if ind1:eq(ind2):min() == 0 then -- To verify that the indices represent equivalent elements, -- gather from the input using the topk indices and compare against -- the sort indices local vals = t:gather(dim, ind2) mytester:assertTensorEq(res1, vals, 0, msg) end end local function compare(t, k, dim, dir, msg) local topKVal, topKInd = t:topk(k, dim, dir, true) local sortKVal, sortKInd = topKViaSort(t, k, dim, dir) compareTensors(t, sortKVal, sortKInd, topKVal, topKInd, dim, msg) end local t = torch.rand(math.random(1, msize), math.random(1, msize), math.random(1, msize)) for kTries = 1, 3 do for dimTries = 1, 3 do for _, transpose in ipairs({true, false}) do for _, dir in ipairs({true, false}) do local testTensor = t local transposeMsg = nil if transpose then local dim1 = math.random(1, t:nDimension()) local dim2 = dim1 while dim1 == dim2 do dim2 = math.random(1, t:nDimension()) end testTensor = t:transpose(dim1, dim2) transposeMsg = 'transpose(' .. dim1 .. ', ' .. dim2 .. ')' end local dim = math.random(1, testTensor:nDimension()) local k = math.random(1, testTensor:size(dim)) local msg = 'topk(' .. k .. ', ' .. dim .. ', ' .. tostring(dir) .. ', true)' if transposeMsg then msg = msg .. ' ' .. transposeMsg end compare(testTensor, k, dim, dir, msg) end end end end end function torchtest.kthvalue() local x = torch.rand(msize, msize, msize) local x0 = x:clone() do local k = math.random(1, msize) local mx, ix = torch.kthvalue(x, k) local mxx, ixx = torch.sort(x) mytester:assertTensorEq(mxx:select(3, k), mx:select(3, 1), 0, 'torch.kthvalue value') mytester:assertTensorEq(ixx:select(3, k), ix:select(3, 1), 0, 'torch.kthvalue index') end do -- test use of result tensors local k = math.random(1, msize) local mx = torch.Tensor() local ix = torch.LongTensor() torch.kthvalue(mx, ix, x, k) local mxx, ixx = torch.sort(x) mytester:assertTensorEq(mxx:select(3, k), mx:select(3, 1), 0, 'torch.kthvalue value') mytester:assertTensorEq(ixx:select(3, k), ix:select(3, 1), 0, 'torch.kthvalue index') end do -- test non-default dim local k = math.random(1, msize) local mx, ix = torch.kthvalue(x, k, 1) local mxx, ixx = torch.sort(x, 1) mytester:assertTensorEq(mxx:select(1, k), mx[1], 0, 'torch.kthvalue value') mytester:assertTensorEq(ixx:select(1, k), ix[1], 0, 'torch.kthvalue index') end do -- non-contiguous local y = x:narrow(2, 1, 1) local y0 = y:clone() local k = math.random(1, msize) local my, ix = torch.kthvalue(y, k) local my0, ix0 = torch.kthvalue(y0, k) mytester:assertTensorEq(my, my0, 0, 'torch.kthvalue value') mytester:assertTensorEq(ix, ix0, 0, 'torch.kthvalue index') end mytester:assertTensorEq(x, x0, 0, 'torch.kthvalue modified input') -- simple test case (with repetitions) local y = torch.Tensor{3,5,4,1,1,5} mytester:assertTensorEq(torch.kthvalue(y, 3), torch.Tensor{3}, 1e-16, 'torch.kthvalue simple') mytester:assertTensorEq(torch.kthvalue(y, 2), torch.Tensor{1}, 1e-16, 'torch.kthvalue simple') end function torchtest.median() for _, msize in ipairs{155,156} do local x = torch.rand(msize, msize) local x0 = x:clone() local mx, ix = torch.median(x) local mxx, ixx = torch.sort(x) local ind = math.floor((msize+1)/2) mytester:assertTensorEq(mxx:select(2, ind), mx:select(2, 1), 0, 'torch.median value') mytester:assertTensorEq(ixx:select(2, ind), ix:select(2, 1), 0, 'torch.median index') -- Test use of result tensor local mr = torch.Tensor() local ir = torch.LongTensor() torch.median(mr, ir, x) mytester:assertTensorEq(mr, mx, 0, 'torch.median result tensor value') mytester:assertTensorEq(ir, ix, 0, 'torch.median result tensor index') -- Test non-default dim mx, ix = torch.median(x, 1) mxx, ixx = torch.sort(x, 1) mytester:assertTensorEq(mxx:select(1, ind), mx[1], 0, 'torch.median value') mytester:assertTensorEq(ixx:select(1, ind), ix[1], 0, 'torch.median index') -- input unchanged mytester:assertTensorEq(x, x0, 0, 'torch.median modified input') end end function torchtest.mode() local x = torch.range(1, msize * msize):reshape(msize, msize) x:select(1, 1):fill(1) x:select(1, 2):fill(1) x:select(2, 1):fill(1) x:select(2, 2):fill(1) local x0 = x:clone() -- Pre-calculated results. local res = torch.Tensor(msize):fill(1) -- The indices are the position of the last appearance of the mode element. local resix = torch.LongTensor(msize):fill(2) resix[1] = msize resix[2] = msize local mx, ix = torch.mode(x) mytester:assertTensorEq(res:view(msize, 1), mx, 0, 'torch.mode value') mytester:assertTensorEq(resix:view(msize, 1), ix, 0, 'torch.mode index') -- Test use of result tensor local mr = torch.Tensor() local ir = torch.LongTensor() torch.mode(mr, ir, x) mytester:assertTensorEq(mr, mx, 0, 'torch.mode result tensor value') mytester:assertTensorEq(ir, ix, 0, 'torch.mode result tensor index') -- Test non-default dim mx, ix = torch.mode(x, 1) mytester:assertTensorEq(res:view(1, msize), mx, 0, 'torch.mode value') mytester:assertTensorEq(resix:view(1, msize), ix, 0, 'torch.mode index') local input = torch.Tensor({ {1, 2, 2, 2, 3, 2}, {1.5, 2, 2, 1.5, 1.5, 5}, }) local value, index = torch.mode(input) local expected_value = torch.Tensor({{2}, {1.5}}) mytester:assertTensorEq(value, expected_value) -- input unchanged mytester:assertTensorEq(x, x0, 0, 'torch.mode modified input') end function torchtest.tril() local x = torch.rand(msize,msize) local mx = torch.tril(x) local mxx = torch.Tensor() torch.tril(mxx,x) mytester:asserteq(maxdiff(mx,mxx),0,'torch.tril value') end function torchtest.triu() local x = torch.rand(msize,msize) local mx = torch.triu(x) local mxx = torch.Tensor() torch.triu(mxx,x) mytester:asserteq(maxdiff(mx,mxx),0,'torch.tril value') end function torchtest.cat() for dim = 1, 3 do local x = torch.rand(13, msize, msize):transpose(1, dim) local y = torch.rand(17, msize, msize):transpose(1, dim) local mx = torch.cat(x, y, dim) mytester:assertTensorEq(mx:narrow(dim, 1, 13), x, 0, 'torch.cat value') mytester:assertTensorEq(mx:narrow(dim, 14, 17), y, 0, 'torch.cat value') local mxx = torch.Tensor() torch.cat(mxx, x, y, dim) mytester:assertTensorEq(mx, mxx, 0, 'torch.cat value') local x = torch.rand(1,2,3) local y = torch.Tensor() local mx = torch.cat(x,y,dim) mytester:asserteq(mx:size(1),1,'torch.cat size') mytester:asserteq(mx:size(2),2,'torch.cat size') mytester:asserteq(mx:size(3),3,'torch.cat size') mytester:assertTensorEq(mx, x, 0, 'torch.cat value') local x = torch.Tensor() local y = torch.Tensor() local mx = torch.cat(x,y,dim) mytester:asserteq(mx:dim(),0,'torch.cat dim') end local x = torch.Tensor() local y = torch.rand(1,2,3) local mx = torch.cat(x,y) mytester:asserteq(mx:size(1),1,'torch.cat size') mytester:asserteq(mx:size(2),2,'torch.cat size') mytester:asserteq(mx:size(3),3,'torch.cat size') mytester:assertTensorEq(mx, y, 0, 'torch.cat value') local x = torch.Tensor() local y = torch.Tensor() local mx = torch.cat(x,y) mytester:asserteq(mx:dim(),0,'torch.cat dim') end function torchtest.catArray() for dim = 1, 3 do local x = torch.rand(13, msize, msize):transpose(1, dim) local y = torch.rand(17, msize, msize):transpose(1, dim) local z = torch.rand(19, msize, msize):transpose(1, dim) local mx = torch.cat({x, y, z}, dim) mytester:assertTensorEq(mx:narrow(dim, 1, 13), x, 0, 'torch.cat value') mytester:assertTensorEq(mx:narrow(dim, 14, 17), y, 0, 'torch.cat value') mytester:assertTensorEq(mx:narrow(dim, 31, 19), z, 0, 'torch.cat value') mytester:assertError(function() torch.cat{} end, 'torch.cat empty table') local mxx = torch.Tensor() torch.cat(mxx, {x, y, z}, dim) mytester:assertTensorEq(mx, mxx, 0, 'torch.cat value') torch.cat(mxx:float(), {x:float(), y:float(), z:float()}, dim) mytester:assertTensorEq(mx, mxx, 0, 'torch.cat value') torch.cat(mxx:double(), {x:double(), y:double(), z:double()}, dim) mytester:assertTensorEq(mx, mxx, 0, 'torch.cat value') local x = torch.rand(1,2,3) local y = torch.Tensor() local mx = torch.cat({x,y},dim) mytester:asserteq(mx:size(1),1,'torch.cat size') mytester:asserteq(mx:size(2),2,'torch.cat size') mytester:asserteq(mx:size(3),3,'torch.cat size') mytester:assertTensorEq(mx, x, 0, 'torch.cat value') local x = torch.Tensor() local y = torch.Tensor() local mx = torch.cat({x,y},dim) mytester:asserteq(mx:dim(),0,'torch.cat dim') end local x = torch.Tensor() local y = torch.rand(1,2,3) local mx = torch.cat({x,y}) mytester:asserteq(mx:size(1),1,'torch.cat size') mytester:asserteq(mx:size(2),2,'torch.cat size') mytester:asserteq(mx:size(3),3,'torch.cat size') mytester:assertTensorEq(mx, y, 0, 'torch.cat value') local x = torch.Tensor() local y = torch.Tensor() local mx = torch.cat({x,y}) mytester:asserteq(mx:dim(),0,'torch.cat dim') end function torchtest.catNoDim() local a local b local c a = torch.Tensor(msize):uniform() b = torch.Tensor(msize):uniform() c = torch.cat(a, b) mytester:assertTensorEq(c:narrow(1, 1, msize), a, 0, 'torch.cat value') mytester:assertTensorEq(c:narrow(1, msize + 1, msize), b, 0, 'torch.cat value') a = torch.Tensor(1, msize):uniform() b = torch.Tensor(1, msize):uniform() c = torch.cat(a, b) mytester:assertTensorEq(c:narrow(2, 1, msize), a, 0, 'torch.cat value') mytester:assertTensorEq(c:narrow(2, msize + 1, msize), b, 0, 'torch.cat value') a = torch.Tensor(10, msize):uniform() b = torch.Tensor(10, msize):uniform() c = torch.cat(a, b) mytester:assertTensorEq(c:narrow(2, 1, msize), a, 0, 'torch.cat value') mytester:assertTensorEq(c:narrow(2, msize + 1, msize), b, 0, 'torch.cat value') end function torchtest.sin_2() local x = torch.rand(msize,msize,msize) local mx = torch.sin(x) local mxx = torch.Tensor() torch.sin(mxx,x) mytester:asserteq(maxdiff(mx,mxx),0,'torch.sin value') end function torchtest.linspace() local from = math.random() local to = from+math.random() local mx = torch.linspace(from,to,137) local mxx = torch.Tensor() torch.linspace(mxx,from,to,137) mytester:asserteq(maxdiff(mx,mxx),0,'torch.linspace value') mytester:assertError(function() torch.linspace(0,1,1) end, 'accepted 1 point between 2 distinct endpoints') mytester:assertTensorEq(torch.linspace(0,0,1),torch.zeros(1),1e-16, 'failed to generate for torch.linspace(0,0,1)') -- Check linspace for generating with start > end. mytester:assertTensorEq(torch.linspace(2,0,3), torch.Tensor{2,1,0}, 1e-16, 'failed to generate for torch.linspace(2,0,3)') -- Check linspace for non-contiguous tensors. local x = torch.zeros(2, 3) local y = x:narrow(2, 2, 2) y:linspace(0, 3, 4) mytester:assertTensorEq(x, torch.Tensor{{0, 0, 1}, {0, 2, 3}}, 1e-16, 'non-contiguous linspace failed') end function torchtest.logspace() local from = math.random() local to = from+math.random() local mx = torch.logspace(from,to,137) local mxx = torch.Tensor() torch.logspace(mxx,from,to,137) mytester:asserteq(maxdiff(mx,mxx),0,'torch.logspace value') mytester:assertError(function() torch.logspace(0,1,1) end, 'accepted 1 point between 2 distinct endpoints') mytester:assertTensorEq(torch.logspace(0,0,1),torch.ones(1),1e-16, 'failed to generate for torch.linspace(0,0,1)') -- Check logspace for generating with start > end. mytester:assertTensorEq(torch.logspace(1,0,2), torch.Tensor{10, 1}, 1e-16, 'failed to generate for torch.logspace(1,0,2)') -- Check logspace for non-contiguous tensors. local x = torch.zeros(2, 3) local y = x:narrow(2, 2, 2) y:logspace(0, 3, 4) mytester:assertTensorEq(x, torch.Tensor{{0, 1, 10}, {0, 100, 1000}}, 1e-16, 'non-contiguous logspace failed') end function torchtest.rand() torch.manualSeed(123456) local mx = torch.rand(msize,msize) local mxx = torch.Tensor() torch.manualSeed(123456) torch.rand(mxx,msize,msize) mytester:asserteq(maxdiff(mx,mxx),0,'torch.rand value') end function torchtest.randn() torch.manualSeed(123456) local mx = torch.randn(msize,msize) local mxx = torch.Tensor() torch.manualSeed(123456) torch.randn(mxx,msize,msize) mytester:asserteq(maxdiff(mx,mxx),0,'torch.randn value') end function torchtest.gesv() if not torch.gesv then return end local a=torch.Tensor({{6.80, -2.11, 5.66, 5.97, 8.23}, {-6.05, -3.30, 5.36, -4.44, 1.08}, {-0.45, 2.58, -2.70, 0.27, 9.04}, {8.32, 2.71, 4.35, -7.17, 2.14}, {-9.67, -5.14, -7.26, 6.08, -6.87}}):t() local b=torch.Tensor({{4.02, 6.19, -8.22, -7.57, -3.03}, {-1.56, 4.00, -8.67, 1.75, 2.86}, {9.81, -4.09, -4.57, -8.61, 8.99}}):t() local mx = torch.gesv(b,a) mytester:assertlt(b:dist(a*mx),1e-12,'torch.gesv') local ta = torch.Tensor() local tb = torch.Tensor() local mxx = torch.gesv(tb,ta,b,a) local mxxx = torch.gesv(b,a,b,a) mytester:asserteq(maxdiff(mx,tb),0,'torch.gesv value temp') mytester:asserteq(maxdiff(mx,b),0,'torch.gesv value flag') mytester:asserteq(maxdiff(mx,mxx),0,'torch.gesv value out1') mytester:asserteq(maxdiff(mx,mxxx),0,'torch.gesv value out2') end function torchtest.gesv_reuse() if not torch.gesv then return end local a=torch.Tensor({{6.80, -2.11, 5.66, 5.97, 8.23}, {-6.05, -3.30, 5.36, -4.44, 1.08}, {-0.45, 2.58, -2.70, 0.27, 9.04}, {8.32, 2.71, 4.35, -7.17, 2.14}, {-9.67, -5.14, -7.26, 6.08, -6.87}}):t() local b=torch.Tensor({{4.02, 6.19, -8.22, -7.57, -3.03}, {-1.56, 4.00, -8.67, 1.75, 2.86}, {9.81, -4.09, -4.57, -8.61, 8.99}}):t() local mx = torch.gesv(b,a) local ta = torch.Tensor() local tb = torch.Tensor() torch.gesv(tb,ta,b,a) mytester:asserteq(maxdiff(mx,tb),0,'torch.gesv value temp') torch.gesv(tb,ta,b,a) mytester:asserteq(maxdiff(mx,tb),0,'torch.gesv value reuse') end function torchtest.trtrs() if not torch.trtrs then return end local a=torch.Tensor({{6.80, -2.11, 5.66, 5.97, 8.23}, {-6.05, -3.30, 5.36, -4.44, 1.08}, {-0.45, 2.58, -2.70, 0.27, 9.04}, {8.32, 2.71, 4.35, -7.17, 2.14}, {-9.67, -5.14, -7.26, 6.08, -6.87}}):t() local b=torch.Tensor({{4.02, 6.19, -8.22, -7.57, -3.03}, {-1.56, 4.00, -8.67, 1.75, 2.86}, {9.81, -4.09, -4.57, -8.61, 8.99}}):t() local U = torch.triu(a) local L = torch.tril(a) -- solve Ux = b local x = torch.trtrs(b, U) mytester:assertlt(b:dist(U*x),1e-12,'torch.trtrs') x = torch.trtrs(b, U, 'U', 'N', 'N') mytester:assertlt(b:dist(U*x),1e-12,'torch.trtrs') -- solve Lx = b x = torch.trtrs(b, L, 'L') mytester:assertlt(b:dist(L*x),1e-12,'torch.trtrs') x = torch.trtrs(b, L, 'L', 'N', 'N') mytester:assertlt(b:dist(L*x),1e-12,'torch.trtrs') -- solve U'x = b x = torch.trtrs(b, U, 'U', 'T') mytester:assertlt(b:dist(U:t()*x),1e-12,'torch.trtrs') x = torch.trtrs(b, U, 'U', 'T', 'N') mytester:assertlt(b:dist(U:t()*x),1e-12,'torch.trtrs') -- solve U'x = b by manual transposition y = torch.trtrs(b, U:t(), 'L', 'N') mytester:assertlt(x:dist(y),1e-12,'torch.trtrs') -- solve L'x = b x = torch.trtrs(b, L, 'L', 'T') mytester:assertlt(b:dist(L:t()*x),1e-12,'torch.trtrs') x = torch.trtrs(b, L, 'L', 'T', 'N') mytester:assertlt(b:dist(L:t()*x),1e-12,'torch.trtrs') -- solve L'x = b by manual transposition y = torch.trtrs(b, L:t(), 'U', 'N') mytester:assertlt(x:dist(y),1e-12,'torch.trtrs') end function torchtest.trtrs_reuse() if not torch.trtrs then return end local a=torch.Tensor({{6.80, -2.11, 5.66, 5.97, 8.23}, {-6.05, -3.30, 5.36, -4.44, 1.08}, {-0.45, 2.58, -2.70, 0.27, 9.04}, {8.32, 2.71, 4.35, -7.17, 2.14}, {-9.67, -5.14, -7.26, 6.08, -6.87}}):t() local b=torch.Tensor({{4.02, 6.19, -8.22, -7.57, -3.03}, {-1.56, 4.00, -8.67, 1.75, 2.86}, {9.81, -4.09, -4.57, -8.61, 8.99}}):t() local mx = torch.trtrs(b,a) local ta = torch.Tensor() local tb = torch.Tensor() torch.trtrs(tb,ta,b,a) mytester:asserteq(maxdiff(mx,tb),0,'torch.trtrs value temp') tb:zero() torch.trtrs(tb,ta,b,a) mytester:asserteq(maxdiff(mx,tb),0,'torch.trtrs value reuse') end function torchtest.gels_uniquely_determined() if not torch.gels then return end local expectedNorm = 0 local a=torch.Tensor({{ 1.44, -9.96, -7.55, 8.34}, {-7.84, -0.28, 3.24, 8.09}, {-4.39, -3.24, 6.27, 5.28}, {4.53, 3.83, -6.64, 2.06}}):t() local b=torch.Tensor({{8.58, 8.26, 8.48, -5.28}, {9.35, -4.43, -0.70, -0.26}}):t() local a_copy = a:clone() local b_copy = b:clone() local mx = torch.gels(b,a) mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a') mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b') mytester:assertalmosteq((torch.mm(a,mx)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') local ta = torch.Tensor() local tb = torch.Tensor() local mxx = torch.gels(tb,ta,b,a) mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a') mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b') mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') local mxxx = torch.gels(b,a,b,a) mytester:assertalmosteq((torch.mm(a_copy,b)-b_copy):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') mytester:asserteq(maxdiff(mx,tb),0,'torch.gels value temp') mytester:asserteq(maxdiff(mx,b),0,'torch.gels value flag') mytester:asserteq(maxdiff(mx,mxx),0,'torch.gels value out1') mytester:asserteq(maxdiff(mx,mxxx),0,'torch.gels value out2') end function torchtest.gels_reuse() if not torch.gels then return end local expectedNorm = 0 local a=torch.Tensor({{ 1.44, -9.96, -7.55, 8.34}, {-7.84, -0.28, 3.24, 8.09}, {-4.39, -3.24, 6.27, 5.28}, {4.53, 3.83, -6.64, 2.06}}):t() local b=torch.Tensor({{8.58, 8.26, 8.48, -5.28}, {9.35, -4.43, -0.70, -0.26}}):t() local ta = torch.Tensor() local tb = torch.Tensor() torch.gels(tb,ta,b,a) mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') torch.gels(tb,ta,b,a) mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') torch.gels(tb,ta,b,a) mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') end function torchtest.gels_overdetermined() if not torch.gels then return end local expectedNorm = 17.390200628863 local a=torch.Tensor({{ 1.44, -9.96, -7.55, 8.34, 7.08, -5.45}, {-7.84, -0.28, 3.24, 8.09, 2.52, -5.70}, {-4.39, -3.24, 6.27, 5.28, 0.74, -1.19}, {4.53, 3.83, -6.64, 2.06, -2.47, 4.70}}):t() local b=torch.Tensor({{8.58, 8.26, 8.48, -5.28, 5.72, 8.93}, {9.35, -4.43, -0.70, -0.26, -7.36, -2.52}}):t() local a_copy = a:clone() local b_copy = b:clone() local mx = torch.gels(b,a) mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a') mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b') mytester:assertalmosteq((torch.mm(a, mx)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') local ta = torch.Tensor() local tb = torch.Tensor() local mxx = torch.gels(tb,ta,b,a) mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a') mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b') mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') local mxxx = torch.gels(b,a,b,a) mytester:assertalmosteq((torch.mm(a_copy,b)-b_copy):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') mytester:asserteq(maxdiff(mx,tb),0,'torch.gels value temp') mytester:asserteq(maxdiff(mx,b),0,'torch.gels value flag') mytester:asserteq(maxdiff(mx,mxx),0,'torch.gels value out1') mytester:asserteq(maxdiff(mx,mxxx),0,'torch.gels value out2') end function torchtest.gels_underdetermined() if not torch.gels then return end local expectedNorm = 0 local a=torch.Tensor({{ 1.44, -9.96, -7.55}, {-7.84, -0.28, 3.24}, {-4.39, -3.24, 6.27}, {4.53, 3.83, -6.64}}):t() local b=torch.Tensor({{8.58, 8.26, 8.48}, {9.35, -4.43, -0.70}}):t() local a_copy = a:clone() local b_copy = b:clone() local mx = torch.gels(b,a) mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a') mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b') mytester:assertalmosteq((torch.mm(a,mx)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') local ta = torch.Tensor() local tb = torch.Tensor() local mxx = torch.gels(tb,ta,b,a) mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a') mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b') mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') local mxxx = torch.gels(b,a,b,a) mytester:assertalmosteq((torch.mm(a_copy,b)-b_copy):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer') mytester:asserteq(maxdiff(mx,tb),0,'torch.gels value temp') mytester:asserteq(maxdiff(mx,b),0,'torch.gels value flag') mytester:asserteq(maxdiff(mx,mxx),0,'torch.gels value out1') mytester:asserteq(maxdiff(mx,mxxx),0,'torch.gels value out2') end function torchtest.eig() if not torch.eig then return end local a=torch.Tensor({{ 1.96, 0.00, 0.00, 0.00, 0.00}, {-6.49, 3.80, 0.00, 0.00, 0.00}, {-0.47, -6.39, 4.17, 0.00, 0.00}, {-7.20, 1.50, -1.51, 5.70, 0.00}, {-0.65, -6.34, 2.67, 1.80, -7.10}}):t():clone() local e = torch.eig(a) local ee,vv = torch.eig(a,'V') local te = torch.Tensor() local tv = torch.Tensor() local eee,vvv = torch.eig(te,tv,a,'V') mytester:assertlt(maxdiff(e,ee),1e-12,'torch.eig value') mytester:assertlt(maxdiff(ee,eee),1e-12,'torch.eig value') mytester:assertlt(maxdiff(ee,te),1e-12,'torch.eig value') mytester:assertlt(maxdiff(vv,vvv),1e-12,'torch.eig value') mytester:assertlt(maxdiff(vv,tv),1e-12,'torch.eig value') end function torchtest.eig_reuse() if not torch.eig then return end local X = torch.randn(4,4) X = X:t()*X local e, v = torch.zeros(4,2), torch.zeros(4,4) torch.eig(e, v, X,'V') local Xhat = v * torch.diag(e:select(2,1)) * v:t() mytester:assertTensorEq(X, Xhat, 1e-8, 'VeV\' wrong') mytester:assert(not v:isContiguous(), 'V is contiguous') torch.eig(e, v, X, 'V') local Xhat = torch.mm(v, torch.mm(e:select(2,1):diag(), v:t())) mytester:assertTensorEq(X, Xhat, 1e-8, 'VeV\' wrong') mytester:assert(not v:isContiguous(), 'V is contiguous') end function torchtest.eig_noncontig() if not torch.eig then return end local X = torch.randn(4,4) X = X:t()*X local e = torch.zeros(4,2,2)[{ {}, 2, {} }] local v = torch.zeros(4,2,4)[{ {}, 2, {} }] mytester:assert(not v:isContiguous(), 'V is contiguous') mytester:assert(not e:isContiguous(), 'E is contiguous') torch.eig(e, v, X,'V') local Xhat = v * torch.diag(e:select(2,1)) * v:t() mytester:assertTensorEq(X, Xhat, 1e-8, 'VeV\' wrong') end function torchtest.test_symeig() if not torch.symeig then return end local xval = torch.rand(100,3) local cov = torch.mm(xval:t(), xval) local rese = torch.zeros(3) local resv = torch.zeros(3,3) -- First call to symeig mytester:assert(resv:isContiguous(), 'resv is not contiguous') -- PASS torch.symeig(rese, resv, cov:clone(), 'V') local ahat = resv*torch.diag(rese)*resv:t() mytester:assertTensorEq(cov, ahat, 1e-8, 'VeV\' wrong') -- PASS -- Second call to symeig mytester:assert(not resv:isContiguous(), 'resv is contiguous') -- FAIL torch.symeig(rese, resv, cov:clone(), 'V') local ahat = torch.mm(torch.mm(resv, torch.diag(rese)), resv:t()) mytester:assertTensorEq(cov, ahat, 1e-8, 'VeV\' wrong') -- FAIL end function torchtest.symeig_noncontig() if not torch.symeig then return end local X = torch.rand(5,5) X = X:t()*X local e = torch.zeros(4,2):select(2,2) local v = torch.zeros(4,2,4)[{ {}, 2, {} }] mytester:assert(not v:isContiguous(), 'V is contiguous') mytester:assert(not e:isContiguous(), 'E is contiguous') torch.symeig(e, v, X,'V') local Xhat = v * torch.diag(e) * v:t() mytester:assertTensorEq(X, Xhat, 1e-8, 'VeV\' wrong') end function torchtest.svd() if not torch.svd then return end local a=torch.Tensor({{8.79, 6.11, -9.15, 9.57, -3.49, 9.84}, {9.93, 6.91, -7.93, 1.64, 4.02, 0.15}, {9.83, 5.04, 4.86, 8.83, 9.80, -8.99}, {5.45, -0.27, 4.85, 0.74, 10.00, -6.02}, {3.16, 7.98, 3.01, 5.80, 4.27, -5.31}}):t():clone() local u,s,v = torch.svd(a) local uu = torch.Tensor() local ss = torch.Tensor() local vv = torch.Tensor() local uuu,sss,vvv = torch.svd(uu,ss,vv,a) mytester:asserteq(maxdiff(u,uu),0,'torch.svd') mytester:asserteq(maxdiff(u,uuu),0,'torch.svd') mytester:asserteq(maxdiff(s,ss),0,'torch.svd') mytester:asserteq(maxdiff(s,sss),0,'torch.svd') mytester:asserteq(maxdiff(v,vv),0,'torch.svd') mytester:asserteq(maxdiff(v,vvv),0,'torch.svd') end function torchtest.svd_reuse() if not torch.svd then return end local X = torch.randn(4,4) local U, S, V = torch.svd(X) local Xhat = torch.mm(U, torch.mm(S:diag(), V:t())) mytester:assertTensorEq(X, Xhat, 1e-8, 'USV\' wrong') mytester:assert(not U:isContiguous(), 'U is contiguous') torch.svd(U, S, V, X) local Xhat = torch.mm(U, torch.mm(S:diag(), V:t())) mytester:assertTensorEq(X, Xhat, 1e-8, 'USV\' wrong') end function torchtest.svd_noncontig() if not torch.svd then return end local X = torch.randn(5,5) local U = torch.zeros(5,2,5)[{ {}, 2, {} }] local S = torch.zeros(5,2)[{ {}, 2 }] local V = torch.zeros(5,2,5)[{ {}, 2, {} }] mytester:assert(not U:isContiguous(), 'U is contiguous') mytester:assert(not S:isContiguous(), 'S is contiguous') mytester:assert(not V:isContiguous(), 'V is contiguous') torch.svd(U, S, V, X) local Xhat = torch.mm(U, torch.mm(S:diag(), V:t())) mytester:assertTensorEq(X, Xhat, 1e-8, 'USV\' wrong') end function torchtest.inverse() if not torch.inverse then return end local M = torch.randn(5,5) local MI = torch.inverse(M) local E = torch.eye(5) mytester:assert(not MI:isContiguous(), 'MI is contiguous') mytester:assertalmosteq(maxdiff(E,torch.mm(M,MI)), 0, 1e-8, 'inverse value') mytester:assertalmosteq(maxdiff(E,torch.mm(MI,M)), 0, 1e-8, 'inverse value') local MII = torch.Tensor(5,5) torch.inverse(MII, M) mytester:assert(not MII:isContiguous(), 'MII is contiguous') mytester:asserteq(maxdiff(MII, MI), 0, 'inverse value in-place') -- second call, now that MII is transposed torch.inverse(MII, M) mytester:assert(not MII:isContiguous(), 'MII is contiguous') mytester:asserteq(maxdiff(MII, MI), 0, 'inverse value in-place') end function torchtest.conv2() local x = torch.rand(math.floor(torch.uniform(50,100)),math.floor(torch.uniform(50,100))) local k = torch.rand(math.floor(torch.uniform(10,20)),math.floor(torch.uniform(10,20))) local imvc = torch.conv2(x,k) local imvc2 = torch.conv2(x,k,'V') local imfc = torch.conv2(x,k,'F') local ki = k:clone(); local ks = k:storage() local kis = ki:storage() for i=ks:size(),1,-1 do kis[ks:size()-i+1]=ks[i] end local imvx = torch.xcorr2(x,ki) local imvx2 = torch.xcorr2(x,ki,'V') local imfx = torch.xcorr2(x,ki,'F') mytester:asserteq(maxdiff(imvc,imvc2),0,'torch.conv2') mytester:asserteq(maxdiff(imvc,imvx),0,'torch.conv2') mytester:asserteq(maxdiff(imvc,imvx2),0,'torch.conv2') mytester:asserteq(maxdiff(imfc,imfx),0,'torch.conv2') mytester:assertlt(math.abs(x:dot(x)-torch.xcorr2(x,x)[1][1]),1e-10,'torch.conv2') local xx = torch.Tensor(2,x:size(1),x:size(2)) xx[1]:copy(x) xx[2]:copy(x) local kk = torch.Tensor(2,k:size(1),k:size(2)) kk[1]:copy(k) kk[2]:copy(k) local immvc = torch.conv2(xx,kk) local immvc2 = torch.conv2(xx,kk,'V') local immfc = torch.conv2(xx,kk,'F') mytester:asserteq(maxdiff(immvc[1],immvc[2]),0,'torch.conv2') mytester:asserteq(maxdiff(immvc[1],imvc),0,'torch.conv2') mytester:asserteq(maxdiff(immvc2[1],imvc2),0,'torch.conv2') mytester:asserteq(maxdiff(immfc[1],immfc[2]),0,'torch.conv2') mytester:asserteq(maxdiff(immfc[1],imfc),0,'torch.conv2') end function torchtest.conv3() local x = torch.rand(math.floor(torch.uniform(20,40)), math.floor(torch.uniform(20,40)), math.floor(torch.uniform(20,40))) local k = torch.rand(math.floor(torch.uniform(5,10)), math.floor(torch.uniform(5,10)), math.floor(torch.uniform(5,10))) local imvc = torch.conv3(x,k) local imvc2 = torch.conv3(x,k,'V') local imfc = torch.conv3(x,k,'F') local ki = k:clone(); local ks = k:storage() local kis = ki:storage() for i=ks:size(),1,-1 do kis[ks:size()-i+1]=ks[i] end local imvx = torch.xcorr3(x,ki) local imvx2 = torch.xcorr3(x,ki,'V') local imfx = torch.xcorr3(x,ki,'F') mytester:asserteq(maxdiff(imvc,imvc2),0,'torch.conv3') mytester:asserteq(maxdiff(imvc,imvx),0,'torch.conv3') mytester:asserteq(maxdiff(imvc,imvx2),0,'torch.conv3') mytester:asserteq(maxdiff(imfc,imfx),0,'torch.conv3') mytester:assertlt(math.abs(x:dot(x)-torch.xcorr3(x,x)[1][1][1]),4*1e-10,'torch.conv3') local xx = torch.Tensor(2,x:size(1),x:size(2),x:size(3)) xx[1]:copy(x) xx[2]:copy(x) local kk = torch.Tensor(2,k:size(1),k:size(2),k:size(3)) kk[1]:copy(k) kk[2]:copy(k) local immvc = torch.conv3(xx,kk) local immvc2 = torch.conv3(xx,kk,'V') local immfc = torch.conv3(xx,kk,'F') mytester:asserteq(maxdiff(immvc[1],immvc[2]),0,'torch.conv3') mytester:asserteq(maxdiff(immvc[1],imvc),0,'torch.conv3') mytester:asserteq(maxdiff(immvc2[1],imvc2),0,'torch.conv3') mytester:asserteq(maxdiff(immfc[1],immfc[2]),0,'torch.conv3') mytester:asserteq(maxdiff(immfc[1],imfc),0,'torch.conv3') end function torchtest.xcorr3_xcorr2_eq() local ix = math.floor(torch.uniform(20,40)) local iy = math.floor(torch.uniform(20,40)) local iz = math.floor(torch.uniform(20,40)) local kx = math.floor(torch.uniform(5,10)) local ky = math.floor(torch.uniform(5,10)) local kz = math.floor(torch.uniform(5,10)) local x = torch.rand(ix,iy,iz) local k = torch.rand(kx,ky,kz) local o3 = torch.xcorr3(x,k) local o32 = torch.zeros(o3:size()) for i=1,o3:size(1) do for j=1,k:size(1) do o32[i]:add(torch.xcorr2(x[i+j-1],k[j])) end end mytester:assertlt(maxdiff(o3,o32),precision,'torch.conv3_conv2_eq') end function torchtest.fxcorr3_fxcorr2_eq() local ix = math.floor(torch.uniform(20,40)) local iy = math.floor(torch.uniform(20,40)) local iz = math.floor(torch.uniform(20,40)) local kx = math.floor(torch.uniform(5,10)) local ky = math.floor(torch.uniform(5,10)) local kz = math.floor(torch.uniform(5,10)) local x = torch.rand(ix,iy,iz) local k = torch.rand(kx,ky,kz) local o3 = torch.xcorr3(x,k,'F') local o32 = torch.zeros(o3:size()) for i=1,x:size(1) do for j=1,k:size(1) do o32[i+j-1]:add(torch.xcorr2(x[i],k[k:size(1)-j + 1],'F')) end end mytester:assertlt(maxdiff(o3,o32),precision,'torch.conv3_conv2_eq') end function torchtest.conv3_conv2_eq() local ix = math.floor(torch.uniform(20,40)) local iy = math.floor(torch.uniform(20,40)) local iz = math.floor(torch.uniform(20,40)) local kx = math.floor(torch.uniform(5,10)) local ky = math.floor(torch.uniform(5,10)) local kz = math.floor(torch.uniform(5,10)) local x = torch.rand(ix,iy,iz) local k = torch.rand(kx,ky,kz) local o3 = torch.conv3(x,k) local o32 = torch.zeros(o3:size()) for i=1,o3:size(1) do for j=1,k:size(1) do o32[i]:add(torch.conv2(x[i+j-1],k[k:size(1)-j+1])) end end mytester:assertlt(maxdiff(o3,o32),precision,'torch.conv3_conv2_eq') end function torchtest.fconv3_fconv2_eq() local ix = math.floor(torch.uniform(20,40)) local iy = math.floor(torch.uniform(20,40)) local iz = math.floor(torch.uniform(20,40)) local kx = math.floor(torch.uniform(5,10)) local ky = math.floor(torch.uniform(5,10)) local kz = math.floor(torch.uniform(5,10)) local x = torch.rand(ix,iy,iz) local k = torch.rand(kx,ky,kz) local o3 = torch.conv3(x,k,'F') local o32 = torch.zeros(o3:size()) for i=1,x:size(1) do for j=1,k:size(1) do o32[i+j-1]:add(torch.conv2(x[i],k[j],'F')) end end mytester:assertlt(maxdiff(o3,o32),precision,'torch.conv3_conv2_eq') end function torchtest.logical() local x = torch.rand(100,100)*2-1; local xx = x:clone() local xgt = torch.gt(x,1) local xlt = torch.lt(x,1) local xeq = torch.eq(x,1) local xne = torch.ne(x,1) local neqs = xgt+xlt local all = neqs + xeq mytester:asserteq(neqs:sum(), xne:sum(), 'torch.logical') mytester:asserteq(x:nElement(),all:double():sum() , 'torch.logical') end function torchtest.RNGState() local state = torch.getRNGState() local stateCloned = state:clone() local before = torch.rand(1000) mytester:assert(state:ne(stateCloned):long():sum() == 0, 'getRNGState should have value semantics, but appears to have reference semantics') torch.setRNGState(state) local after = torch.rand(1000) mytester:assertTensorEq(before, after, 1e-16, 'getRNGState/setRNGState not generating same sequence') end function torchtest.RNGStateAliasing() torch.manualSeed(1) local unused = torch.uniform() -- Fork the random number stream at this point local gen = torch.Generator() torch.setRNGState(gen, torch.getRNGState()) local target_value = torch.rand(1000) --Dramatically alter the internal state of the main generator local also_unused = torch.rand(100000) local forked_value = torch.rand(gen, 1000) mytester:assertTensorEq(target_value, forked_value, 1e-16, "RNG has not forked correctly.") end function torchtest.serializeGenerator() local generator = torch.Generator() torch.manualSeed(generator, 123) local differentGenerator = torch.Generator() torch.manualSeed(differentGenerator, 124) local serializedGenerator = torch.serialize(generator) local deserializedGenerator = torch.deserialize(serializedGenerator) local generated = torch.random(generator) local differentGenerated = torch.random(differentGenerator) local deserializedGenerated = torch.random(deserializedGenerator) mytester:asserteq(generated, deserializedGenerated, 'torch.Generator changed internal state after being serialized') mytester:assertne(generated, differentGenerated, 'Generators with different random seed should not produce the same output') end function torchtest.testBoxMullerState() torch.manualSeed(123) local odd_number = 101 local seeded = torch.randn(odd_number) local state = torch.getRNGState() local midstream = torch.randn(odd_number) torch.setRNGState(state) local repeat_midstream = torch.randn(odd_number) torch.manualSeed(123) local reseeded = torch.randn(odd_number) mytester:assertTensorEq(midstream, repeat_midstream, 1e-16, 'getRNGState/setRNGState not generating same sequence of normally distributed numbers') mytester:assertTensorEq(seeded, reseeded, 1e-16, 'repeated calls to manualSeed not generating same sequence of normally distributed numbers') end function torchtest.testCholesky() local x = torch.rand(10,10) local A = torch.mm(x, x:t()) ---- Default Case local C = torch.potrf(A) local B = torch.mm(C:t(), C) mytester:assertTensorEq(A, B, 1e-14, 'potrf did not allow rebuilding the original matrix') ---- Test Upper Triangular local U = torch.potrf(A, 'U') B = torch.mm(U:t(), U) mytester:assertTensorEq(A, B, 1e-14, 'potrf (upper) did not allow rebuilding the original matrix') ---- Test Lower Triangular local L = torch.potrf(A, 'L') B = torch.mm(L, L:t()) mytester:assertTensorEq(A, B, 1e-14, 'potrf (lower) did not allow rebuilding the original matrix') end function torchtest.potrs() if not torch.potrs then return end local a=torch.Tensor({{6.80, -2.11, 5.66, 5.97, 8.23}, {-6.05, -3.30, 5.36, -4.44, 1.08}, {-0.45, 2.58, -2.70, 0.27, 9.04}, {8.32, 2.71, 4.35, -7.17, 2.14}, {-9.67, -5.14, -7.26, 6.08, -6.87}}):t() local b=torch.Tensor({{4.02, 6.19, -8.22, -7.57, -3.03}, {-1.56, 4.00, -8.67, 1.75, 2.86}, {9.81, -4.09, -4.57, -8.61, 8.99}}):t() ---- Make sure 'a' is symmetric PSD a = torch.mm(a, a:t()) ---- Upper Triangular Test local U = torch.potrf(a, 'U') local x = torch.potrs(b, U, 'U') mytester:assertlt(b:dist(a*x),1e-12,"torch.potrs; uplo='U'") ---- Lower Triangular Test local L = torch.potrf(a, 'L') x = torch.potrs(b, L, 'L') mytester:assertlt(b:dist(a*x),1e-12,"torch.potrs; uplo='L") end function torchtest.potri() if not torch.potrs then return end local a=torch.Tensor({{6.80, -2.11, 5.66, 5.97, 8.23}, {-6.05, -3.30, 5.36, -4.44, 1.08}, {-0.45, 2.58, -2.70, 0.27, 9.04}, {8.32, 2.71, 4.35, -7.17, 2.14}, {-9.67, -5.14, -7.26, 6.08, -6.87}}):t() ---- Make sure 'a' is symmetric PSD a = torch.mm(a, a:t()) ---- Compute inverse directly local inv0 = torch.inverse(a) ---- Default case local chol = torch.potrf(a) local inv1 = torch.potri(chol) mytester:assertlt(inv0:dist(inv1),1e-12,"torch.potri; uplo=''") ---- Upper Triangular Test chol = torch.potrf(a, 'U') inv1 = torch.potri(chol, 'U') mytester:assertlt(inv0:dist(inv1),1e-12,"torch.potri; uplo='U'") ---- Lower Triangular Test chol = torch.potrf(a, 'L') inv1 = torch.potri(chol, 'L') mytester:assertlt(inv0:dist(inv1),1e-12,"torch.potri; uplo='L'") end function torchtest.pstrf() local function checkPsdCholesky(a, uplo, inplace) local u, piv, args, a_reconstructed if inplace then u = torch.Tensor(a:size()) piv = torch.IntTensor(a:size(1)) args = {u, piv, a} else args = {a} end if uplo then table.insert(args, uplo) end u, piv = torch.pstrf(unpack(args)) if uplo == 'L' then a_reconstructed = torch.mm(u, u:t()) else a_reconstructed = torch.mm(u:t(), u) end piv = piv:long() local a_permuted = a:index(1, piv):index(2, piv) mytester:assertTensorEq(a_permuted, a_reconstructed, 1e-14, 'torch.pstrf did not allow rebuilding the original matrix;' .. 'uplo=' .. tostring(uplo)) end local dimensions = { {5, 1}, {5, 3}, {5, 5}, {10, 10} } for _, dim in pairs(dimensions) do local m = torch.Tensor(unpack(dim)):uniform() local a = torch.mm(m, m:t()) -- add a small number to the diagonal to make the matrix numerically positive semidefinite for i = 1, m:size(1) do a[i][i] = a[i][i] + 1e-7 end checkPsdCholesky(a, nil, false) checkPsdCholesky(a, 'U', false) checkPsdCholesky(a, 'L', false) checkPsdCholesky(a, nil, true) checkPsdCholesky(a, 'U', true) checkPsdCholesky(a, 'L', true) end end function torchtest.testNumel() local b = torch.ByteTensor(3, 100, 100) mytester:asserteq(b:nElement(), 3*100*100, "nElement not right") mytester:asserteq(b:numel(), 3*100*100, "numel not right") end -- Generate a tensor of size `size` whose values are ascending integers from -- `start` (or 1, if `start is not given) local function consecutive(size, start) local sequence = torch.ones(torch.Tensor(size):prod(1)[1]):cumsum(1) if start then sequence:add(start - 1) end return sequence:resize(unpack(size)) end function torchtest.index() local badIndexMsg = "Lookup with valid index should return correct result" local reference = consecutive{3, 3, 3} mytester:assertTensorEq(reference[1], consecutive{3, 3}, 1e-16, badIndexMsg) mytester:assertTensorEq(reference[2], consecutive({3, 3}, 10), 1e-16, badIndexMsg) mytester:assertTensorEq(reference[3], consecutive({3, 3}, 19), 1e-16, badIndexMsg) mytester:assertTensorEq(reference[{1}], consecutive{3, 3}, 1e-16, badIndexMsg) mytester:assertTensorEq(reference[{2}], consecutive({3, 3}, 10), 1e-16, badIndexMsg) mytester:assertTensorEq(reference[{3}], consecutive({3, 3}, 19), 1e-16, badIndexMsg) mytester:assertTensorEq(reference[{1,2}], consecutive({3}, 4), 1e-16, badIndexMsg) mytester:assertTensorEq(reference[{{1,2}}], consecutive({2, 3, 3}), 1e-16, badIndexMsg) mytester:asserteq(reference[{3, 3, 3}], 27, badIndexMsg) mytester:assertTensorEq(reference[{}], consecutive{3, 3, 3}, 1e-16, badIndexMsg) local shouldErrorMsg = "Lookup with too many indices should error" mytester:assertError(function() return reference[{1, 1, 1, 1}] end, shouldErrorMsg) mytester:assertError(function() return reference[{1, 1, 1, {1, 1}}] end, shouldErrorMsg) mytester:assertError(function() return reference[{3, 3, 3, 3, 3, 3, 3, 3}] end, shouldErrorMsg) end function torchtest.newIndex() local badIndexMsg = "Assignment to valid index should produce correct result" local reference = consecutive{3, 3, 3} -- This relies on __index__() being correct - but we have separate tests for that local function checkPartialAssign(index) local reference = torch.zeros(3, 3, 3) reference[index] = consecutive{3, 3, 3}[index] mytester:assertTensorEq(reference[index], consecutive{3, 3, 3}[index], 1e-16, badIndexMsg) reference[index] = 0 mytester:assertTensorEq(reference, torch.zeros(3, 3, 3), 1e-16, badIndexMsg) end checkPartialAssign{1} checkPartialAssign{2} checkPartialAssign{3} checkPartialAssign{1,2} checkPartialAssign{2,3} checkPartialAssign{1,3} checkPartialAssign{} local shouldErrorMsg = "Assignment with too many indices should error" mytester:assertError(function() reference[{1, 1, 1, 1}] = 1 end, shouldErrorMsg) mytester:assertError(function() reference[{1, 1, 1, {1, 1}}] = 1 end, shouldErrorMsg) mytester:assertError(function() reference[{3, 3, 3, 3, 3, 3, 3, 3}] = 1 end, shouldErrorMsg) end function torchtest.indexCopy() local nCopy, nDest = 3, 20 local dest = torch.randn(nDest,4,5) local src = torch.randn(nCopy,4,5) local idx = torch.randperm(nDest):narrow(1, 1, nCopy):long() local dest2 = dest:clone() dest:indexCopy(1, idx, src) for i=1,idx:size(1) do dest2[idx[i]]:copy(src[i]) end mytester:assertTensorEq(dest, dest2, 0.000001, "indexCopy tensor error") local dest = torch.randn(nDest) local src = torch.randn(nCopy) local idx = torch.randperm(nDest):narrow(1, 1, nCopy):long() local dest2 = dest:clone() dest:indexCopy(1, idx, src) for i=1,idx:size(1) do dest2[idx[i]] = src[i] end mytester:assertTensorEq(dest, dest2, 0.000001, "indexCopy scalar error") end function torchtest.indexAdd() local nCopy, nDest = 3, 20 local dest = torch.randn(nDest,4,5) local src = torch.randn(nCopy,4,5) local idx = torch.randperm(nDest):narrow(1, 1, nCopy):long() local dest2 = dest:clone() dest:indexAdd(1, idx, src) for i=1,idx:size(1) do dest2[idx[i]]:add(src[i]) end mytester:assertTensorEq(dest, dest2, 0.000001, "indexAdd tensor error") local dest = torch.randn(nDest) local src = torch.randn(nCopy) local idx = torch.randperm(nDest):narrow(1, 1, nCopy):long() local dest2 = dest:clone() dest:indexAdd(1, idx, src) for i=1,idx:size(1) do dest2[idx[i]] = dest2[idx[i]] + src[i] end mytester:assertTensorEq(dest, dest2, 0.000001, "indexAdd scalar error") end -- Fill idx with valid indices. local function fillIdx(idx, dim, dim_size, elems_per_row, m, n, o) for i = 1, (dim == 1 and 1 or m) do for j = 1, (dim == 2 and 1 or n) do for k = 1, (dim == 3 and 1 or o) do local ii = {i, j, k} ii[dim] = {} idx[ii] = torch.randperm(dim_size)[{{1, elems_per_row}}] end end end end function torchtest.gather() local m, n, o = torch.random(10, 20), torch.random(10, 20), torch.random(10, 20) local elems_per_row = torch.random(10) local dim = torch.random(3) local src = torch.randn(m, n, o) local idx_size = {m, n, o} idx_size[dim] = elems_per_row local idx = torch.LongTensor():resize(unpack(idx_size)) fillIdx(idx, dim, src:size(dim), elems_per_row, m, n, o) local actual = torch.gather(src, dim, idx) local expected = torch.Tensor():resize(unpack(idx_size)) for i = 1, idx_size[1] do for j = 1, idx_size[2] do for k = 1, idx_size[3] do local ii = {i, j, k} ii[dim] = idx[i][j][k] expected[i][j][k] = src[ii] end end end mytester:assertTensorEq(actual, expected, 0, "Wrong values for gather") idx[1][1][1] = 23 mytester:assertError(function() torch.gather(src, dim, idx) end, "Invalid index not detected") end function torchtest.gatherMax() local src = torch.randn(3, 4, 5) local expected, idx = src:max(3) local actual = torch.gather(src, 3, idx) mytester:assertTensorEq(actual, expected, 0, "Wrong values for gather") end function torchtest.scatter() local m, n, o = torch.random(10, 20), torch.random(10, 20), torch.random(10, 20) local elems_per_row = torch.random(10) local dim = torch.random(3) local idx_size = {m, n, o} idx_size[dim] = elems_per_row local idx = torch.LongTensor():resize(unpack(idx_size)) fillIdx(idx, dim, ({m, n, o})[dim], elems_per_row, m, n, o) local src = torch.Tensor():resize(unpack(idx_size)):normal() local actual = torch.zeros(m, n, o):scatter(dim, idx, src) local expected = torch.zeros(m, n, o) for i = 1, idx_size[1] do for j = 1, idx_size[2] do for k = 1, idx_size[3] do local ii = {i, j, k} ii[dim] = idx[i][j][k] expected[ii] = src[i][j][k] end end end mytester:assertTensorEq(actual, expected, 0, "Wrong values for scatter") idx[1][1][1] = 34 mytester:assertError(function() torch.zeros(m, n, o):scatter(dim, idx, src) end, "Invalid index not detected") end function torchtest.scatterFill() local m, n, o = torch.random(10, 20), torch.random(10, 20), torch.random(10, 20) local elems_per_row = torch.random(10) local dim = torch.random(3) local val = torch.uniform() local idx_size = {m, n, o} idx_size[dim] = elems_per_row local idx = torch.LongTensor():resize(unpack(idx_size)) fillIdx(idx, dim, ({m, n, o})[dim], elems_per_row, m, n, o) local actual = torch.zeros(m, n, o):scatter(dim, idx, val) local expected = torch.zeros(m, n, o) for i = 1, idx_size[1] do for j = 1, idx_size[2] do for k = 1, idx_size[3] do local ii = {i, j, k} ii[dim] = idx[i][j][k] expected[ii] = val end end end mytester:assertTensorEq(actual, expected, 0, "Wrong values for scatter") idx[1][1][1] = 28 mytester:assertError(function() torch.zeros(m, n, o):scatter(dim, idx, val) end, "Invalid index not detected") end function torchtest.maskedCopy() local nCopy, nDest = 3, 10 local dest = torch.randn(nDest) local src = torch.randn(nCopy) local mask = torch.ByteTensor{0,0,0,0,1,0,1,0,1,0} local dest2 = dest:clone() dest:maskedCopy(mask, src) local j = 1 for i=1,nDest do if mask[i] == 1 then dest2[i] = src[j] j = j + 1 end end mytester:assertTensorEq(dest, dest2, 0.000001, "maskedCopy error") -- make source bigger than number of 1s in mask src = torch.randn(nDest) local ok = pcall(dest.maskedCopy, dest, mask, src) mytester:assert(ok, "maskedCopy incorrect complaint when" .. " src is bigger than mask's one count") src = torch.randn(nCopy - 1) -- make src smaller. this should fail local ok = pcall(dest.maskedCopy, dest, mask, src) mytester:assert(not ok, "maskedCopy not erroring when" .. " src is smaller than mask's one count") end function torchtest.maskedSelect() local nSrc = 10 local src = torch.randn(nSrc) local mask = torch.rand(nSrc):mul(2):floor():byte() local dst = torch.Tensor() dst:maskedSelect(src, mask) local dst2 = {} for i=1,nSrc do if mask[i] == 1 then table.insert(dst2, src[i]) end end mytester:assertTensorEq(dst, torch.DoubleTensor(dst2), 0.000001, "maskedSelect error") end function torchtest.maskedFill() local nDst = 10 local dst = torch.randn(nDst) local mask = torch.rand(nDst):mul(2):floor():byte() local val = math.random() local dst2 = dst:clone() dst:maskedFill(mask, val) for i=1,nDst do if mask[i] == 1 then dst2[i] = val end end mytester:assertTensorEq(dst, dst2, 0.000001, "maskedFill error") end function torchtest.abs() local size = 1000 local range = 1000 local original = torch.rand(size):mul(range) -- Tensor filled with {-1,1} local switch = torch.rand(size):mul(2):floor():mul(2):add(-1) local types = {'torch.DoubleTensor', 'torch.FloatTensor', 'torch.LongTensor', 'torch.IntTensor'} for k,t in ipairs(types) do local data = original:type(t) local switch = switch:type(t) local input = torch.cmul(data, switch) mytester:assertTensorEq(input:abs(), data, 1e-16, 'Error in abs() for '..t) end -- Checking that the right abs function is called for LongTensor local bignumber if torch.LongTensor():elementSize() > 4 then bignumber = 2^31 + 1 else bignumber = 2^15 + 1 end local input = torch.LongTensor{-bignumber} mytester:assertgt(input:abs()[1], 0, 'torch.abs(3)') end function torchtest.classInModule() -- Need a global for this module _mymodule123 = {} local x = torch.class('_mymodule123.myclass') mytester:assert(x ~= nil, 'Could not create class in module') -- Remove the global _G['_mymodule123'] = nil debug.getregistry()['_mymodule123.myclass']=nil end function torchtest.classNoModule() local x = torch.class('_myclass123') mytester:assert(x ~= nil, 'Could not create class in module') debug.getregistry()['_myclass123'] = nil end function torchtest.type() local objects = {torch.DoubleTensor(), {}, nil, 2, "asdf"} local types = {'torch.DoubleTensor', 'table', 'nil', 'number', 'string'} for i,obj in ipairs(objects) do mytester:assert(torch.type(obj) == types[i], "wrong type "..types[i]) end end function torchtest.isTypeOfInheritance() do local A = torch.class('A') local B, parB = torch.class('B', 'A') local C, parC = torch.class('C', 'A') end local a, b, c = A(), B(), C() mytester:assert(torch.isTypeOf(a, 'A'), 'isTypeOf error, string spec') mytester:assert(torch.isTypeOf(a, A), 'isTypeOf error, constructor') mytester:assert(torch.isTypeOf(b, 'B'), 'isTypeOf error child class') mytester:assert(torch.isTypeOf(b, B), 'isTypeOf error child class ctor') mytester:assert(torch.isTypeOf(b, 'A'), 'isTypeOf error: inheritance') mytester:assert(torch.isTypeOf(b, A), 'isTypeOf error: inheritance') mytester:assert(not torch.isTypeOf(c, 'B'), 'isTypeOf error: common parent') mytester:assert(not torch.isTypeOf(c, B), 'isTypeOf error: common parent') debug.getregistry()['A'] = nil debug.getregistry()['B'] = nil debug.getregistry()['C'] = nil end function torchtest.isTypeOfPartial() do local TorchDummy = torch.class('TorchDummy') local OtherTorchDummy = torch.class('OtherTorchDummy') local TorchMember = torch.class('TorchMember') local OtherTorchMember = torch.class('OtherTorchMember') local FirstTorchMember = torch.class('FirstTorchMember', 'TorchMember') local SecondTorchMember = torch.class('SecondTorchMember', 'TorchMember') local ThirdTorchMember = torch.class('ThirdTorchMember', 'OtherTorchMember') end local td, otd = TorchDummy(), OtherTorchDummy() local tm, ftm, stm, ttm = TorchMember(), FirstTorchMember(), SecondTorchMember(), ThirdTorchMember() mytester:assert(not torch.isTypeOf(td, 'OtherTorchDummy'), 'isTypeOf error: incorrect partial match') mytester:assert(not torch.isTypeOf(otd, 'TorchDummy'), 'isTypeOf error: incorrect partial match') mytester:assert(torch.isTypeOf(tm, 'TorchMember'), 'isTypeOf error, string spec') mytester:assert(torch.isTypeOf(tm, TorchMember), 'isTypeOf error, constructor') mytester:assert(torch.isTypeOf(ftm, 'FirstTorchMember'), 'isTypeOf error child class') mytester:assert(torch.isTypeOf(ftm, FirstTorchMember), 'isTypeOf error child class ctor') mytester:assert(torch.isTypeOf(ftm, 'TorchMember'), 'isTypeOf error: inheritance') mytester:assert(torch.isTypeOf(ftm, TorchMember), 'isTypeOf error: inheritance') mytester:assert(not torch.isTypeOf(stm, 'FirstTorchMember'), 'isTypeOf error: common parent') mytester:assert(not torch.isTypeOf(stm, FirstTorchMember), 'isTypeOf error: common parent') mytester:assert(not torch.isTypeOf(ttm, TorchMember), 'isTypeOf error: inheritance') mytester:assert(not torch.isTypeOf(ttm, 'TorchMember'), 'isTypeOf error: inheritance') debug.getregistry()['TorchDummy'] = nil debug.getregistry()['OtherTorchDummy'] = nil debug.getregistry()['TorchMember'] = nil debug.getregistry()['OtherTorchMember'] = nil debug.getregistry()['FirstTorchMember'] = nil debug.getregistry()['SecondTorchMember'] = nil debug.getregistry()['ThirdTorchMember'] = nil end function torchtest.isTypeOfPattern() local t = torch.LongTensor() mytester:assert(torch.isTypeOf(t, torch.LongTensor), 'isTypeOf error: incorrect match') mytester:assert(not torch.isTypeOf(t, torch.IntTensor), 'isTypeOf error: incorrect match') mytester:assert(torch.isTypeOf(t, 'torch.LongTensor'), 'isTypeOf error: incorrect match') mytester:assert(not torch.isTypeOf(t, 'torch.Long'), 'isTypeOf error: incorrect match') mytester:assert(torch.isTypeOf(t, 'torch.*Tensor'), 'isTypeOf error: incorrect match') mytester:assert(torch.isTypeOf(t, '.*Long'), 'isTypeOf error: incorrect match') mytester:assert(not torch.isTypeOf(t, 'torch.IntTensor'), 'isTypeOf error: incorrect match') end function torchtest.isTensor() for k,v in ipairs({"real", "half"}) do torchtest_isTensor(torch.getmetatable(torch.Tensor():type())[v]) end end function torchtest_isTensor(func) local t = func(torch.randn(3,4)) mytester:assert(torch.isTensor(t), 'error in isTensor') mytester:assert(torch.isTensor(t[1]), 'error in isTensor for subTensor') mytester:assert(not torch.isTensor(t[1][2]), 'false positive in isTensor') mytester:assert(torch.Tensor.isTensor(t), 'alias not working') end function torchtest.isStorage() for k,v in ipairs({"real", "half"}) do torchtest_isStorage(torch.getmetatable(torch.Tensor():type())[v]) end end function torchtest_isStorage(func) local t = torch.randn(3,4) mytester:assert(torch.isStorage(t:storage()), 'error in isStorage') mytester:assert(not torch.isStorage(t), 'false positive in isStorage') end function torchtest.view() for k,v in ipairs({"real", "half"}) do torchtest_view(torch.getmetatable(torch.Tensor():type())[v]) end end function torchtest_view(func) local tensor = func(torch.rand(15)) local template = func(torch.rand(3,5)) local target = template:size():totable() mytester:assertTableEq(tensor:viewAs(template):size():totable(), target, 'Error in viewAs') mytester:assertTableEq(tensor:view(3,5):size():totable(), target, 'Error in view') mytester:assertTableEq(tensor:view(torch.LongStorage{3,5}):size():totable(), target, 'Error in view using LongStorage') mytester:assertTableEq(tensor:view(-1,5):size():totable(), target, 'Error in view using dimension -1') mytester:assertTableEq(tensor:view(3,-1):size():totable(), target, 'Error in view using dimension -1') local tensor_view = tensor:view(5,3) tensor_view:fill(torch.rand(1)[1]) mytester:asserteq((tensor_view-tensor):abs():max(), 0, 'Error in view') local target_tensor = func(torch.Tensor()) mytester:assertTableEq(target_tensor:viewAs(tensor, template):size():totable(), target, 'Error in viewAs') mytester:assertTableEq(target_tensor:view(tensor, 3,5):size():totable(), target, 'Error in view') mytester:assertTableEq(target_tensor:view(tensor, torch.LongStorage{3,5}):size():totable(), target, 'Error in view using LongStorage') mytester:assertTableEq(target_tensor:view(tensor, -1,5):size():totable(), target, 'Error in view using dimension -1') mytester:assertTableEq(target_tensor:view(tensor, 3,-1):size():totable(), target, 'Error in view using dimension -1') target_tensor:fill(torch.rand(1)[1]) mytester:asserteq((target_tensor-tensor):abs():max(), 0, 'Error in viewAs') end function torchtest.expand() for k,v in ipairs({"real", "half"}) do torchtest_expand(torch.getmetatable(torch.Tensor():type())[v]) end end function torchtest_expand(func) local result = func(torch.Tensor()) local tensor = func(torch.rand(8,1)) local template = func(torch.rand(8,5)) local target = template:size():totable() mytester:assertTableEq(tensor:expandAs(template):size():totable(), target, 'Error in expandAs') mytester:assertTableEq(tensor:expand(8,5):size():totable(), target, 'Error in expand') mytester:assertTableEq(tensor:expand(torch.LongStorage{8,5}):size():totable(), target, 'Error in expand using LongStorage') result:expandAs(tensor,template) mytester:assertTableEq(result:size():totable(), target, 'Error in expandAs using result') result:expand(tensor,8,5) mytester:assertTableEq(result:size():totable(), target, 'Error in expand using result') result:expand(tensor,torch.LongStorage{8,5}) mytester:assertTableEq(result:size():totable(), target, 'Error in expand using result and LongStorage') mytester:asserteq((result:mean(2):view(8,1)-tensor):abs():max(), 0, 'Error in expand (not equal)') end function torchtest.repeatTensor() for k,v in ipairs({"real", "half"}) do torchtest_repeatTensor(torch.getmetatable(torch.Tensor():type())[v]) end end function torchtest_repeatTensor(func, mean) local result = func(torch.Tensor()) local tensor = func(torch.rand(8,4)) local size = {3,1,1} local sizeStorage = torch.LongStorage(size) local target = {3,8,4} mytester:assertTableEq(tensor:repeatTensor(unpack(size)):size():totable(), target, 'Error in repeatTensor') mytester:assertTableEq(tensor:repeatTensor(sizeStorage):size():totable(), target, 'Error in repeatTensor using LongStorage') result:repeatTensor(tensor,unpack(size)) mytester:assertTableEq(result:size():totable(), target, 'Error in repeatTensor using result') result:repeatTensor(tensor,sizeStorage) mytester:assertTableEq(result:size():totable(), target, 'Error in repeatTensor using result and LongStorage') mytester:asserteq((result:mean(1):view(8,4)-tensor):abs():max(), 0, 'Error in repeatTensor (not equal)') end function torchtest.isSameSizeAs() for k,v in ipairs({"real", "half"}) do torchtest_isSameSizeAs(torch.getmetatable(torch.Tensor():type())[v]) end end function torchtest_isSameSizeAs(func) local t1 = func(torch.Tensor(3, 4, 9, 10)) local t2 = func(torch.Tensor(3, 4)) local t3 = func(torch.Tensor(1, 9, 3, 3)) local t4 = func(torch.Tensor(3, 4, 9, 10)) mytester:assert(t1:isSameSizeAs(t2) == false, "wrong answer ") mytester:assert(t1:isSameSizeAs(t3) == false, "wrong answer ") mytester:assert(t1:isSameSizeAs(t4) == true, "wrong answer ") end function torchtest.isSetTo() for k,v in ipairs({"real", "half"}) do torchtest_isSetTo(torch.getmetatable(torch.Tensor():type())[v]) end end function torchtest_isSetTo(func) local t1 = func(torch.Tensor(3, 4, 9, 10)) local t2 = func(torch.Tensor(3, 4, 9, 10)) local t3 = func(torch.Tensor()):set(t1) local t4 = t3:reshape(12, 90) mytester:assert(t1:isSetTo(t2) == false, "tensors do not share storage") mytester:assert(t1:isSetTo(t3) == true, "tensor is set to other") mytester:assert(t3:isSetTo(t1) == true, "isSetTo should be symmetric") mytester:assert(t1:isSetTo(t4) == false, "tensors have different view") mytester:assert(not func(torch.Tensor()):isSetTo(func(torch.Tensor())), "Tensors with no storages should not appear to be set " .. "to each other") end function torchtest.equal() -- Contiguous, 1D local t1 = torch.Tensor{3, 4, 9, 10} local t2 = t1:clone() local t3 = torch.Tensor{1, 9, 3, 10} local t4 = torch.Tensor{3, 4, 9} local t5 = torch.Tensor() mytester:assert(t1:equal(t2) == true, "wrong answer ") mytester:assert(t1:equal(t3) == false, "wrong answer ") mytester:assert(t1:equal(t4) == false, "wrong answer ") mytester:assert(t1:equal(t5) == false, "wrong answer ") mytester:assert(torch.equal(t1, t2) == true, "wrong answer ") mytester:assert(torch.equal(t1, t3) == false, "wrong answer ") mytester:assert(torch.equal(t1, t4) == false, "wrong answer ") mytester:assert(torch.equal(t1, t5) == false, "wrong answer ") -- Non contiguous, 2D local s = torch.Tensor({{1, 2, 3, 4}, {5, 6, 7, 8}}) local s1 = s[{{}, {2, 3}}] local s2 = s1:clone() local s3 = torch.Tensor({{2, 3}, {6, 7}}) local s4 = torch.Tensor({{0, 0}, {0, 0}}) mytester:assert(not s1:isContiguous(), "wrong answer ") mytester:assert(s1:equal(s2) == true, "wrong answer ") mytester:assert(s1:equal(s3) == true, "wrong answer ") mytester:assert(s1:equal(s4) == false, "wrong answer ") mytester:assert(torch.equal(s1, s2) == true, "wrong answer ") mytester:assert(torch.equal(s1, s3) == true, "wrong answer ") mytester:assert(torch.equal(s1, s4) == false, "wrong answer ") end function torchtest.isSize() for k,v in ipairs({"real", "half"}) do torchtest_isSize(torch.getmetatable(torch.Tensor():type())[v]) end end function torchtest_isSize(func) local t1 = func(torch.Tensor(3, 4, 5)) local s1 = torch.LongStorage({3, 4, 5}) local s2 = torch.LongStorage({5, 4, 3}) mytester:assert(t1:isSize(s1) == true, "wrong answer ") mytester:assert(t1:isSize(s2) == false, "wrong answer ") mytester:assert(t1:isSize(t1:size()) == true, "wrong answer ") end function torchtest.elementSize() local byte = torch.ByteStorage():elementSize() local char = torch.CharStorage():elementSize() local short = torch.ShortStorage():elementSize() local int = torch.IntStorage():elementSize() local long = torch.LongStorage():elementSize() local float = torch.FloatStorage():elementSize() local double = torch.DoubleStorage():elementSize() local half = torch.HalfStorage():elementSize() mytester:asserteq(byte, torch.ByteTensor():elementSize()) mytester:asserteq(char, torch.CharTensor():elementSize()) mytester:asserteq(short, torch.ShortTensor():elementSize()) mytester:asserteq(int, torch.IntTensor():elementSize()) mytester:asserteq(long, torch.LongTensor():elementSize()) mytester:asserteq(float, torch.FloatTensor():elementSize()) mytester:asserteq(double, torch.DoubleTensor():elementSize()) mytester:asserteq(half, torch.HalfTensor():elementSize()) mytester:assertne(byte, 0) mytester:assertne(char, 0) mytester:assertne(short, 0) mytester:assertne(int, 0) mytester:assertne(long, 0) mytester:assertne(float, 0) mytester:assertne(double, 0) mytester:assertne(half, 0) -- These tests are portable, not necessarily strict for your system. mytester:asserteq(byte, 1) mytester:asserteq(char, 1) mytester:assert(short >= 2) mytester:assert(int >= 2) mytester:assert(int >= short) mytester:assert(long >= 4) mytester:assert(long >= int) mytester:assert(double >= float) mytester:assert(half <= float) end function torchtest.split() for k,v in ipairs({"real", "half"}) do torchtest_split(torch.getmetatable(torch.Tensor():type())[v]) end end function torchtest_split(func) local result = {} local tensor = func(torch.rand(7,4)) local splitSize = 3 local targetSize = {{3,4},{3,4},{1,4}} local dim = 1 local splits = tensor:split(splitSize, dim) local start = 1 for i, split in ipairs(splits) do mytester:assertTableEq(split:size():totable(), targetSize[i], 'Size error in split '..i) mytester:assertTensorEq(tensor:narrow(dim, start, targetSize[i][dim]), split, 0.00001, 'Content error in split '..i) start = start + targetSize[i][dim] end torch.split(result, tensor, splitSize, dim) local start = 1 for i, split in ipairs(result) do mytester:assertTableEq(split:size():totable(), targetSize[i], 'Result size error in split '..i) mytester:assertTensorEq(tensor:narrow(dim, start, targetSize[i][dim]), split, 0.000001, 'Result content error in split '..i) start = start + targetSize[i][dim] end mytester:asserteq(#splits, #result, 'Non-consistent output size from split') for i, split in ipairs(splits) do mytester:assertTensorEq(split,result[i], 0, 'Non-consistent outputs from split') end end function torchtest.chunk() for k,v in ipairs({"real", "half"}) do torchtest_chunk(torch.getmetatable(torch.Tensor():type())[v]) end end function torchtest_chunk(func) local result = {} local tensor = func(torch.rand(4,7)) local nChunk = 3 local targetSize = {{4,3},{4,3},{4,1}} local dim = 2 local splits = tensor:chunk(nChunk, dim) local start = 1 for i, split in ipairs(splits) do mytester:assertTableEq(split:size():totable(), targetSize[i], 'Size error in chunk '..i) mytester:assertTensorEq(tensor:narrow(dim, start, targetSize[i][dim]), split, 0.00001, 'Content error in chunk '..i) start = start + targetSize[i][dim] end torch.split(result, tensor, nChunk, dim) local start = 1 for i, split in ipairs(result) do mytester:assertTableEq(split:size():totable(), targetSize[i], 'Result size error in chunk '..i) mytester:assertTensorEq(tensor:narrow(dim, start, targetSize[i][dim]), split, 0.000001, 'Result content error in chunk '..i) start = start + targetSize[i][dim] end end function torchtest.table() local convStorage = { ['real'] = 'FloatStorage', ['half'] = 'HalfStorage' } for k,v in ipairs(convStorage) do torchtest_totable(torch.getmetatable(torch.Tensor():type())[k], v) end end function torchtest_totable(func, storageType) local table0D = {} local tensor0D = func(torch.Tensor(table0D)) mytester:assertTableEq(torch.totable(tensor0D), table0D, 'tensor0D:totable incorrect') local table1D = {1, 2, 3} local tensor1D = func(torch.Tensor(table1D)) local storage = torch[storageType](table1D) mytester:assertTableEq(tensor1D:totable(), table1D, 'tensor1D:totable incorrect') mytester:assertTableEq(storage:totable(), table1D, 'storage:totable incorrect') mytester:assertTableEq(torch.totable(tensor1D), table1D, 'torch.totable incorrect for Tensors') mytester:assertTableEq(torch.totable(storage), table1D, 'torch.totable incorrect for Storages') local table2D = {{1, 2}, {3, 4}} local tensor2D = func(torch.Tensor(table2D)) mytester:assertTableEq(tensor2D:totable(), table2D, 'tensor2D:totable incorrect') local tensor3D = func(torch.Tensor({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})) local tensorNonContig = tensor3D:select(2, 2) mytester:assert(not tensorNonContig:isContiguous(), 'invalid test') mytester:assertTableEq(tensorNonContig:totable(), {{3, 4}, {7, 8}}, 'totable() incorrect for non-contiguous tensors') end function torchtest.permute() for k,v in ipairs({"real", "half"}) do torchtest_permute(torch.getmetatable(torch.Tensor():type())[v]) end end function torchtest_permute(func) local orig = {1,2,3,4,5,6,7} local perm = torch.randperm(7):totable() local x = torch.Tensor(unpack(orig)):fill(0) local new = x:permute(unpack(perm)):size():totable() mytester:assertTableEq(perm, new, 'Tensor:permute incorrect') mytester:assertTableEq(x:size():totable(), orig, 'Tensor:permute changes tensor') end function torchtest.serialize() local tableObj = {6, a = 42} local tensObj = torch.randn(3,4,5) -- Test serializing a table local serString = torch.serialize(tableObj) local serStorage = torch.serializeToStorage(tableObj) mytester:assertTableEq(tableObj, torch.deserialize(serString)) mytester:assertTableEq(tableObj, torch.deserializeFromStorage(serStorage)) -- Test serializing a Tensor serString = torch.serialize(tensObj) serStorage = torch.serializeToStorage(tensObj) mytester:assertTensorEq(tensObj, torch.deserialize(serString), 1e-10) mytester:assertTensorEq(tensObj, torch.deserializeFromStorage(serStorage), 1e-10) end function torchtest.storageview() local s1 = torch.LongStorage({3, 4, 5}) local s2 = torch.LongStorage(s1, 2) mytester:assert(s2:size() == 2, "should be size 2") mytester:assert(s2[1] == s1[2], "should have 4 at position 1") mytester:assert(s2[2] == s1[3], "should have 5 at position 2") s2[1] = 13 mytester:assert(13 == s1[2], "should have 13 at position 1") end function torchtest.nonzero() local nSrc = 12 local types = { 'torch.ByteTensor', 'torch.CharTensor', 'torch.ShortTensor', 'torch.IntTensor', 'torch.FloatTensor', 'torch.DoubleTensor', 'torch.LongTensor', } local shapes = { torch.LongStorage{12}, torch.LongStorage{12, 1}, torch.LongStorage{1, 12}, torch.LongStorage{6, 2}, torch.LongStorage{3, 2, 2}, } for _, type in ipairs(types) do local tensor = torch.rand(nSrc):mul(2):floor():type(type) for _, shape in ipairs(shapes) do tensor = tensor:reshape(shape) local dst1 = torch.nonzero(tensor) local dst2 = tensor:nonzero() -- Does not work. Torch uses the first argument to determine what -- type the Tensor is expected to be. In our case the second argument -- determines the type of Tensor. --local dst3 = torch.LongTensor() --torch.nonzero(dst3, tensor) -- However, there are workarounds to this issue when it is desired to -- use an existing tensor for the result: local dst4 = torch.LongTensor() tensor.nonzero(dst4, tensor) if shape:size() == 1 then local dst = {} for i = 1 , nSrc do if tensor[i] ~= 0 then table.insert(dst, i) end end mytester:assertTensorEq(dst1:select(2, 1), torch.LongTensor(dst), 0.0, "nonzero error") mytester:assertTensorEq(dst2:select(2, 1), torch.LongTensor(dst), 0.0, "nonzero error") --mytester:assertTensorEq(dst3:select(2, 1), torch.LongTensor(dst), -- 0.0, "nonzero error") mytester:assertTensorEq(dst4:select(2, 1), torch.LongTensor(dst), 0.0, "nonzero error") elseif shape:size() == 2 then -- This test will allow through some false positives. It only checks -- that the elements flagged positive are indeed non-zero. for i=1,dst1:size()[1] do mytester:assert(tensor[dst1[i][1]][dst1[i][2]] ~= 0) end elseif shape:size() == 3 then -- This test will allow through some false positives. It only checks -- that the elements flagged positive are indeed non-zero. for i=1,dst1:size()[1] do mytester:assert(tensor[dst1[i][1]][dst1[i][2]][dst1[i][3]] ~= 0) end end end end end function torchtest.testheaptracking() local oldheaptracking = torch._heaptracking if oldheaptracking == nil then oldheaptracking = false end torch.setheaptracking(true) mytester:assert(torch._heaptracking == true, 'Heap tracking expected true') torch.setheaptracking(false) mytester:assert(torch._heaptracking == false, 'Heap tracking expected false') -- put heap tracking to its original state torch.setheaptracking(oldheaptracking) end function torchtest.bernoulli() local size = torch.LongStorage{10, 10} local t = torch.ByteTensor(size) local function isBinary(t) return torch.ne(t, 0):cmul(torch.ne(t, 1)):sum() == 0 end local p = 0.5 t:bernoulli(p) mytester:assert(isBinary(t), 'Sample from torch.bernoulli is not binary') local p = torch.rand(size) t:bernoulli(p) mytester:assert(isBinary(t), 'Sample from torch.bernoulli is not binary') end function torchtest.logNormal() local t = torch.FloatTensor(10, 10) local mean, std = torch.uniform(), 0.1 * torch.uniform() local tolerance = 0.02 t:logNormal(mean, std) local logt = t:log() mytester:assertalmosteq(logt:mean(), mean, tolerance, 'mean is wrong') mytester:assertalmosteq(logt:std(), std, tolerance, 'tolerance is wrong') end function torch.test(tests) torch.setheaptracking(true) math.randomseed(os.time()) if torch.getdefaulttensortype() == 'torch.FloatTensor' then precision = 1e-4 elseif torch.getdefaulttensortype() == 'torch.DoubleTensor' then precision = 1e-8 end mytester = torch.Tester() mytester:add(torchtest) mytester:run(tests) return mytester end test/test_Multinomial.lua000066400000000000000000000013741316246254300161040ustar00rootroot00000000000000-- Test multinomial for rare events (based on https://github.com/torch/torch7/issues/418) -- and for performance (cf. https://github.com/torch/torch7/issues/453) sys.tic() do local p = torch.FloatTensor(1001000):fill(1) p:narrow(1, 50001, 50000):fill(1e-3) p:div(p:sum()) local N = 1001000 local n = 0 local c = torch.LongTensor(p:nElement()):zero() local c_ptr = c:data() - 1 local tmp = torch.LongTensor() for i = 1, 100 do p.multinomial(tmp, p, N, true); n = n + N tmp:apply(function(i) c_ptr[i] = c_ptr[i] + 1 end) end local actual = c:narrow(1, 50001, 50000):sum() local expected = n*p:narrow(1, 50001, 50000):sum() print('Actual, Expected: ', actual, expected) end print('Time spent: ', sys.toc()) test/test_Tester.lua000066400000000000000000000474261316246254300150700ustar00rootroot00000000000000require 'torch' local tester = torch.Tester() local MESSAGE = "a really useful informative error message" local subtester = torch.Tester() -- The message only interests us in case of failure subtester._success = function(self) return true, MESSAGE end subtester._failure = function(self, message) return false, message end local tests = torch.TestSuite() local test_name_passed_to_setUp local calls_to_setUp = 0 local calls_to_tearDown = 0 local originalIoWrite = io.write local function disableIoWrite() io.write = function() end end local function enableIoWrite() io.write = originalIoWrite end local function meta_assert_success(success, message) tester:assert(success == true, "assert wasn't successful") tester:assert(string.find(message, MESSAGE) ~= nil, "message doesn't match") end local function meta_assert_failure(success, message) tester:assert(success == false, "assert didn't fail") tester:assert(string.find(message, MESSAGE) ~= nil, "message doesn't match") end function tests.really_test_assert() assert((subtester:assert(true, MESSAGE)), "subtester:assert doesn't actually work!") assert(not (subtester:assert(false, MESSAGE)), "subtester:assert doesn't actually work!") end function tests.setEarlyAbort() disableIoWrite() for _, earlyAbort in ipairs{false, true} do local myTester = torch.Tester() local invokedCount = 0 local myTests = {} function myTests.t1() invokedCount = invokedCount + 1 myTester:assert(false) end myTests.t2 = myTests.t1 myTester:setEarlyAbort(earlyAbort) myTester:add(myTests) pcall(myTester.run, myTester) tester:assert(invokedCount == (earlyAbort and 1 or 2), "wrong number of tests invoked for use with earlyAbort") end enableIoWrite() end function tests.setRethrowErrors() disableIoWrite() local myTester = torch.Tester() myTester:setRethrowErrors(true) myTester:add(function() error("a throw") end) tester:assertErrorPattern(function() myTester:run() end, "a throw", "error should be rethrown") enableIoWrite() end function tests.disable() disableIoWrite() for disableCount = 1, 2 do local myTester = torch.Tester() local tests = {} local test1Invoked = false local test2Invoked = false function tests.test1() test1Invoked = true end function tests.test2() test2Invoked = true end myTester:add(tests) if disableCount == 1 then myTester:disable('test1'):run() tester:assert((not test1Invoked) and test2Invoked, "disabled test shouldn't have been invoked") else myTester:disable({'test1', 'test2'}):run() tester:assert((not test1Invoked) and (not test2Invoked), "disabled tests shouldn't have been invoked") end end enableIoWrite() end function tests.assert() meta_assert_success(subtester:assert(true, MESSAGE)) meta_assert_failure(subtester:assert(false, MESSAGE)) end local function testEqNe(eqExpected, ...) if eqExpected then meta_assert_success(subtester:eq(...)) meta_assert_failure(subtester:ne(...)) else meta_assert_failure(subtester:eq(...)) meta_assert_success(subtester:ne(...)) end end --[[ Test :assertGeneralEq and :assertGeneralNe (also known as :eq and :ne). Note that in-depth testing of testing of many specific types of data (such as Tensor) is covered below, when we test specific functions (such as :assertTensorEq). This just does a general check, as well as testing of testing of mixed datatypes. ]] function tests.assertGeneral() local one = torch.Tensor{1} testEqNe(true, one, one, MESSAGE) testEqNe(false, one, 1, MESSAGE) testEqNe(true, "hi", "hi", MESSAGE) testEqNe(true, {one, 1}, {one, 1}, MESSAGE) testEqNe(true, {{{one}}}, {{{one}}}, MESSAGE) testEqNe(false, {{{one}}}, {{one}}, MESSAGE) testEqNe(true, torch.Storage{1}, torch.Storage{1}, MESSAGE) testEqNe(false, torch.FloatStorage{1}, torch.LongStorage{1}, MESSAGE) testEqNe(false, torch.Storage{1}, torch.Storage{1, 2}, MESSAGE) testEqNe(false, "one", 1, MESSAGE) testEqNe(false, {one}, {one + torch.Tensor{1e-10}}, MESSAGE) testEqNe(true, {one}, {one + torch.Tensor{1e-10}}, 1e-9, MESSAGE) end function tests.assertlt() meta_assert_success(subtester:assertlt(1, 2, MESSAGE)) meta_assert_failure(subtester:assertlt(2, 1, MESSAGE)) meta_assert_failure(subtester:assertlt(1, 1, MESSAGE)) end function tests.assertgt() meta_assert_success(subtester:assertgt(2, 1, MESSAGE)) meta_assert_failure(subtester:assertgt(1, 2, MESSAGE)) meta_assert_failure(subtester:assertgt(1, 1, MESSAGE)) end function tests.assertle() meta_assert_success(subtester:assertle(1, 2, MESSAGE)) meta_assert_failure(subtester:assertle(2, 1, MESSAGE)) meta_assert_success(subtester:assertle(1, 1, MESSAGE)) end function tests.assertge() meta_assert_success(subtester:assertge(2, 1, MESSAGE)) meta_assert_failure(subtester:assertge(1, 2, MESSAGE)) meta_assert_success(subtester:assertge(1, 1, MESSAGE)) end function tests.asserteq() meta_assert_success(subtester:asserteq(1, 1, MESSAGE)) meta_assert_failure(subtester:asserteq(1, 2, MESSAGE)) end function tests.assertalmosteq() meta_assert_success(subtester:assertalmosteq(1, 1, MESSAGE)) meta_assert_success(subtester:assertalmosteq(1, 1 + 1e-17, MESSAGE)) meta_assert_success(subtester:assertalmosteq(1, 2, 2, MESSAGE)) meta_assert_failure(subtester:assertalmosteq(1, 2, MESSAGE)) meta_assert_failure(subtester:assertalmosteq(1, 3, 1, MESSAGE)) end function tests.assertne() meta_assert_success(subtester:assertne(1, 2, MESSAGE)) meta_assert_failure(subtester:assertne(1, 1, MESSAGE)) end -- The `alsoTestEq` flag is provided to test :eq in addition to :assertTensorEq. -- The behaviour of the two isn't always the same due to handling of tensors of -- different dimensions but the same number of elements. local function testTensorEqNe(eqExpected, alsoTestEq, ...) if eqExpected then meta_assert_success(subtester:assertTensorEq(...)) meta_assert_failure(subtester:assertTensorNe(...)) if alsoTestEq then meta_assert_success(subtester:eq(...)) meta_assert_failure(subtester:ne(...)) end else meta_assert_failure(subtester:assertTensorEq(...)) meta_assert_success(subtester:assertTensorNe(...)) if alsoTestEq then meta_assert_failure(subtester:eq(...)) meta_assert_success(subtester:ne(...)) end end end function tests.assertTensor_types() local allTypes = { torch.ByteTensor, torch.CharTensor, torch.ShortTensor, torch.IntTensor, torch.LongTensor, torch.FloatTensor, torch.DoubleTensor, } for _, tensor1 in ipairs(allTypes) do for _, tensor2 in ipairs(allTypes) do local t1 = tensor1():ones(10) local t2 = tensor2():ones(10) testTensorEqNe(tensor1 == tensor2, true, t1, t2, 1e-6, MESSAGE) end end testTensorEqNe(false, true, torch.FloatTensor(), torch.LongTensor(), MESSAGE) end function tests.assertTensor_sizes() local t = torch.Tensor() -- no dimensions local t2 = torch.ones(2) local t3 = torch.ones(3) local t12 = torch.ones(1, 2) assert(subtester._assertTensorEqIgnoresDims == true) -- default state testTensorEqNe(false, false, t, t2, 1e-6, MESSAGE) testTensorEqNe(false, false, t, t3, 1e-6, MESSAGE) testTensorEqNe(false, false, t, t12, 1e-6, MESSAGE) testTensorEqNe(false, false, t2, t3, 1e-6, MESSAGE) testTensorEqNe(true, false, t2, t12, 1e-6, MESSAGE) testTensorEqNe(false, false, t3, t12, 1e-6, MESSAGE) subtester._assertTensorEqIgnoresDims = false testTensorEqNe(false, true, t, t2, 1e-6, MESSAGE) testTensorEqNe(false, true, t, t3, 1e-6, MESSAGE) testTensorEqNe(false, true, t, t12, 1e-6, MESSAGE) testTensorEqNe(false, true, t2, t3, 1e-6, MESSAGE) testTensorEqNe(false, true, t2, t12, 1e-6, MESSAGE) testTensorEqNe(false, true, t3, t12, 1e-6, MESSAGE) subtester._assertTensorEqIgnoresDims = true -- reset back end function tests.assertTensor_epsilon() local t1 = torch.rand(100, 100) local t2 = torch.rand(100, 100) * 1e-5 local t3 = t1 + t2 testTensorEqNe(true, true, t1, t3, 1e-4, MESSAGE) testTensorEqNe(false, true, t1, t3, 1e-6, MESSAGE) end function tests.assertTensor_arg() local one = torch.Tensor{1} tester:assertErrorPattern( function() subtester:assertTensorEq(one, 2) end, "Second argument should be a Tensor") -- Test that assertTensorEq support message and tolerance in either ordering tester:assertNoError( function() subtester:assertTensorEq(one, one, 0.1, MESSAGE) end) tester:assertNoError( function() subtester:assertTensorEq(one, one, MESSAGE, 0.1) end) end function tests.assertTensor() local t1 = torch.randn(100, 100) local t2 = t1:clone() local t3 = torch.randn(100, 100) testTensorEqNe(true, true, t1, t2, 1e-6, MESSAGE) testTensorEqNe(false, true, t1, t3, 1e-6, MESSAGE) testTensorEqNe(true, true, torch.Tensor(), torch.Tensor(), MESSAGE) end -- Check that calling assertTensorEq with two tensors with the same content but -- different dimensions gives a warning. function tests.assertTensorDimWarning() local myTester = torch.Tester() myTester:add( function() myTester:assertTensorEq(torch.Tensor{{1}}, torch.Tensor{1}) end) local warningGiven = false io.write = function(s) if string.match(s, 'but different dimensions') then warningGiven = true end end myTester:run() enableIoWrite() tester:assert(warningGiven, "Calling :assertTensorEq({{1}}, {1}) should give a warning") end local function testTableEqNe(eqExpected, ...) if eqExpected then meta_assert_success(subtester:assertTableEq(...)) meta_assert_failure(subtester:assertTableNe(...)) meta_assert_success(subtester:eq(...)) meta_assert_failure(subtester:ne(...)) else meta_assert_failure(subtester:assertTableEq(...)) meta_assert_success(subtester:assertTableNe(...)) meta_assert_failure(subtester:eq(...)) meta_assert_success(subtester:ne(...)) end end function tests.assertTable() testTableEqNe(true, {1, 2, 3}, {1, 2, 3}, MESSAGE) testTableEqNe(false, {1, 2, 3}, {3, 2, 1}, MESSAGE) testTableEqNe(true, {1, 2, {4, 5}}, {1, 2, {4, 5}}, MESSAGE) testTableEqNe(false, {1, 2, 3}, {1,2}, MESSAGE) testTableEqNe(false, {1, 2, 3}, {1, 2, 3, 4}, MESSAGE) testTableEqNe(true, {{1}}, {{1}}, MESSAGE) testTableEqNe(false, {{1}}, {{{1}}}, MESSAGE) testTableEqNe(true, {false}, {false}, MESSAGE) testTableEqNe(false, {true}, {false}, MESSAGE) testTableEqNe(false, {false}, {true}, MESSAGE) local tensor = torch.rand(100, 100) local t1 = {1, "a", key = "value", tensor = tensor, subtable = {"nested"}} local t2 = {1, "a", key = "value", tensor = tensor, subtable = {"nested"}} testTableEqNe(true, t1, t2, MESSAGE) for k, v in pairs(t1) do local x = "something else" t2[k] = nil t2[x] = v testTableEqNe(false, t1, t2, MESSAGE) t2[x] = nil t2[k] = x testTableEqNe(false, t1, t2, MESSAGE) t2[k] = v testTableEqNe(true, t1, t2, MESSAGE) end end local function good_fn() end local function bad_fn() error("muahaha!") end function tests.assertError() meta_assert_success(subtester:assertError(bad_fn, MESSAGE)) meta_assert_failure(subtester:assertError(good_fn, MESSAGE)) end function tests.assertNoError() meta_assert_success(subtester:assertNoError(good_fn, MESSAGE)) meta_assert_failure(subtester:assertNoError(bad_fn, MESSAGE)) end function tests.assertErrorPattern() meta_assert_success(subtester:assertErrorPattern(bad_fn, "haha", MESSAGE)) meta_assert_failure(subtester:assertErrorPattern(bad_fn, "hehe", MESSAGE)) end function tests.testSuite_duplicateTests() local function createDuplicateTests() local tests = torch.TestSuite() function tests.testThis() end function tests.testThis() end end tester:assertErrorPattern(createDuplicateTests, "Test testThis is already defined.") end --[[ Returns a Tester with `numSuccess` success cases, `numFailure` failure cases, and with an error if `hasError` is true. Success and fail tests are evaluated with tester:eq ]] local function genDummyTest(numSuccess, numFailure, hasError) hasError = hasError or false local dummyTester = torch.Tester() local dummyTests = torch.TestSuite() if numSuccess > 0 then function dummyTests.testDummySuccess() for i = 1, numSuccess do dummyTester:eq({1}, {1}, '', 0) end end end if numFailure > 0 then function dummyTests.testDummyFailure() for i = 1, numFailure do dummyTester:eq({1}, {2}, '', 0) end end end if hasError then function dummyTests.testDummyError() error('dummy error') end end return dummyTester:add(dummyTests) end function tests.runStatusAndAssertCounts() local emptyTest = genDummyTest(0, 0, false) local sucTest = genDummyTest(1, 0, false) local multSucTest = genDummyTest(4, 0, false) local failTest = genDummyTest(0, 1, false) local errTest = genDummyTest(0, 0, true) local errFailTest = genDummyTest(0, 1, true) local errSucTest = genDummyTest(1, 0, true) local failSucTest = genDummyTest(1, 1, false) local failSucErrTest = genDummyTest(1, 1, true) disableIoWrite() local success, msg = pcall(emptyTest.run, emptyTest) tester:asserteq(success, true, "pcall should succeed for empty tests") local success, msg = pcall(sucTest.run, sucTest) tester:asserteq(success, true, "pcall should succeed for 1 successful test") local success, msg = pcall(multSucTest.run, multSucTest) tester:asserteq(success, true, "pcall should succeed for 2+ successful tests") local success, msg = pcall(failTest.run, failTest) tester:asserteq(success, false, "pcall should fail for tests with failure") local success, msg = pcall(errTest.run, errTest) tester:asserteq(success, false, "pcall should fail for tests with error") local success, msg = pcall(errFailTest.run, errFailTest) tester:asserteq(success, false, "pcall should fail for error+fail tests") local success, msg = pcall(errSucTest.run, errSucTest) tester:asserteq(success, false, "pcall should fail for error+success tests") local success, msg = pcall(failSucTest.run, failSucTest) tester:asserteq(success, false, "pcall should fail for fail+success tests") local success, msg = pcall(failSucErrTest.run, failSucErrTest) tester:asserteq(success, false, "pcall should fail for fail+success+err test") enableIoWrite() tester:asserteq(emptyTest.countasserts, 0, "emptyTest should have 0 asserts") tester:asserteq(sucTest.countasserts, 1, "sucTest should have 1 assert") tester:asserteq(multSucTest.countasserts, 4, "multSucTest should have 4 asserts") tester:asserteq(failTest.countasserts, 1, "failTest should have 1 assert") tester:asserteq(errTest.countasserts, 0, "errTest should have 0 asserts") tester:asserteq(errFailTest.countasserts, 1, "errFailTest should have 1 assert") tester:asserteq(errSucTest.countasserts, 1, "errSucTest should have 0 asserts") tester:asserteq(failSucTest.countasserts, 2, "failSucTest should have 2 asserts") end function tests.checkNestedTestsForbidden() disableIoWrite() local myTester = torch.Tester() local myTests = {{function() end}} tester:assertErrorPattern(function() myTester:add(myTests) end, "Nested sets", "tester should forbid adding nested test sets") enableIoWrite() end function tests.checkWarningOnAssertObject() -- This test checks that calling assert with an object generates a warning local myTester = torch.Tester() local myTests = {} function myTests.assertAbuse() myTester:assert({}) end myTester:add(myTests) local warningGiven = false io.write = function(s) if string.match(s, 'should only be used for boolean') then warningGiven = true end end myTester:run() enableIoWrite() tester:assert(warningGiven, "Should warn on calling :assert(object)") end function tests.checkWarningOnAssertNeObject() -- This test checks that calling assertne with two objects generates warning local myTester = torch.Tester() local myTests = {} function myTests.assertAbuse() myTester:assertne({}, {}) end myTester:add(myTests) local warningGiven = false io.write = function(s) if string.match(s, 'assertne should only be used to compare basic') then warningGiven = true end end myTester:run() enableIoWrite() tester:assert(warningGiven, "Should warn on calling :assertne(obj, obj)") end function tests.checkWarningOnExtraAssertArguments() -- This test checks that calling assert with extra args gives a lua error local myTester = torch.Tester() local myTests = {} function myTests.assertAbuse() myTester:assert(true, "some message", "extra argument") end myTester:add(myTests) local errorGiven = false io.write = function(s) if string.match(s, 'Unexpected arguments') then errorGiven = true end end tester:assertError(function() myTester:run() end) enableIoWrite() tester:assert(errorGiven, ":assert should fail on extra arguments") end function tests.checkWarningOnUsingTable() -- Checks that if we don't use a TestSuite then gives a warning local myTester = torch.Tester() local myTests = {} myTester:add(myTests) local errorGiven = false io.write = function(s) if string.match(s, 'use TestSuite rather than plain lua table') then errorGiven = true end end myTester:run() enableIoWrite() tester:assert(errorGiven, "Using a plain lua table for testsuite should warn") end function tests.checkMaxAllowedSetUpAndTearDown() -- Checks can have at most 1 set-up and at most 1 tear-down function local function f() end local myTester = torch.Tester() for _, name in ipairs({'_setUp', '_tearDown'}) do tester:assertNoError(function() myTester:add(f, name) end, "Adding 1 set-up / tear-down should be fine") tester:assertErrorPattern(function() myTester:add(f, name) end, "Only one", "Adding second set-up / tear-down should fail") end end function tests.test_setUp() tester:asserteq(test_name_passed_to_setUp, 'test_setUp') for key, value in pairs(tester.tests) do tester:assertne(key, '_setUp') end end function tests.test_tearDown() for key, value in pairs(tester.tests) do tester:assertne(key, '_tearDown') end end function tests._setUp(name) test_name_passed_to_setUp = name calls_to_setUp = calls_to_setUp + 1 end function tests._tearDown(name) calls_to_tearDown = calls_to_tearDown + 1 end tester:add(tests):run() -- Additional tests to check that _setUp and _tearDown were called. local test_count = 0 for _ in pairs(tester.tests) do test_count = test_count + 1 end local postTests = torch.TestSuite() local postTester = torch.Tester() function postTests.test_setUp(tester) postTester:asserteq(calls_to_setUp, test_count, "Expected " .. test_count .. " calls to _setUp") end function postTests.test_tearDown() postTester:asserteq(calls_to_tearDown, test_count, "Expected " .. test_count .. " calls to _tearDown") end postTester:add(postTests):run() test/test_aliasMultinomial.lua000066400000000000000000000034331316246254300171140ustar00rootroot00000000000000local tester = torch.Tester() local function aliasMultinomial() local n_class = 10000 local probs = torch.Tensor(n_class):uniform(0,1) probs:div(probs:sum()) local a = torch.Timer() local state = torch.multinomialAliasSetup(probs) print("AliasMultinomial setup in "..a:time().real.." seconds(hot)") a:reset() state = torch.multinomialAliasSetup(probs, state) print("AliasMultinomial setup in "..a:time().real.." seconds(cold)") a:reset() tester:assert(state[1]:min() >= 0, "Index ="..state[1]:min().."alias indices has an index below or equal to 0") tester:assert(state[1]:max() <= n_class, state[1]:max().." alias indices has an index exceeding num_class") local output = torch.LongTensor(1000000) torch.multinomialAlias(output, state) local n_samples = output:nElement() print("AliasMultinomial draw "..n_samples.." elements from "..n_class.." classes ".."in "..a:time().real.." seconds") local counts = torch.Tensor(n_class):zero() mult_output = torch.multinomial(probs, n_samples, true) print("Multinomial draw "..n_samples.." elements from "..n_class.." classes ".." in "..a:time().real.." seconds") tester:assert(output:min() > 0, "sampled indices has an index below or equal to 0") tester:assert(output:max() <= n_class, "indices has an index exceeding num_class") output:apply(function(x) counts[x] = counts[x] + 1 end) a:reset() counts:div(counts:sum()) tester:assert(state[1]:min() >= 0, "Index ="..state[1]:min().."alias indices has an index below or equal to 0") tester:assert(state[1]:max() <= n_class, state[1]:max().." alias indices has an index exceeding num_class") tester:eq(probs, counts, 0.001, "probs and counts should be approximately equal") end tester:add(aliasMultinomial) tester:run() test/test_half.lua000066400000000000000000000034611316246254300145230ustar00rootroot00000000000000local mytester local torchtest = torch.TestSuite() -- Lua 5.2 compatibility local loadstring = loadstring or load local unpack = unpack or table.unpack function torchtest.easy() local x=torch.randn(5, 6):half() mytester:assert(x:isContiguous(), 'x should be contiguous') mytester:assert(x:dim() == 2, 'x should have dim of 2') mytester:assert(x:nDimension() == 2, 'x should have nDimension of 2') mytester:assert(x:nElement() == 5 * 6, 'x should have 30 elements') local stride = x:stride() local expectedStride = torch.LongStorage{6,1} for i=1,stride:size() do mytester:assert(stride[i] == expectedStride[i], "stride is wrong") end x=x:t() mytester:assert(not x:isContiguous(), 'x transpose should not be contiguous') x=x:transpose(1,2) mytester:assert(x:isContiguous(), 'x should be contiguous after 2 transposes') local y=torch.HalfTensor() y:resizeAs(x:t()):copy(x:t()) mytester:assert(x:isContiguous(), 'after resize and copy, x should be contiguous') mytester:assertTensorEq(y, x:t(), 0.001, 'copy broken after resizeAs') local z=torch.HalfTensor() z:resize(6, 5):copy(x:t()) mytester:assertTensorEq(y, x:t(), 0.001, 'copy broken after resize') end function torchtest.narrowSub() local x = torch.randn(5, 6):half() local narrow = x:narrow(1, 2, 3) local sub = x:sub(2, 4) mytester:assertTensorEq(narrow, sub, 0.001, 'narrow not equal to sub') end function torchtest.selectClone() local x = torch.zeros(5, 6) x:select(1,2):fill(2) x=x:half() local y=x:clone() mytester:assertTensorEq(x, y, 0.001, 'not equal after select and clone') x:select(1,1):fill(3) mytester:assert(y[1][1] == 0, 'clone broken') end torch.setheaptracking(true) math.randomseed(os.time()) mytester = torch.Tester() mytester:add(torchtest) mytester:run(tests) test/test_qr.lua000066400000000000000000000234151316246254300142340ustar00rootroot00000000000000-- This file contains tests for the QR decomposition functions in torch: -- torch.qr(), torch.geqrf() and torch.orgqr(). local torch = require 'torch' local tester = torch.Tester() local tests = torch.TestSuite() -- torch.qr() with result tensors given. local function qrInPlace(tensorFunc) return function(x) local q, r = tensorFunc(), tensorFunc() torch.qr(q, r, x:clone()) return q, r end end -- torch.qr() without result tensors given. local function qrReturned(tensorFunc) return function(x) return torch.qr(x:clone()) end end -- torch.geqrf() with result tensors given. local function geqrfInPlace(tensorFunc) return function(x) local result = tensorFunc() local tau = tensorFunc() local result_, tau_ = torch.geqrf(result, tau, x) assert(torch.pointer(result) == torch.pointer(result_), 'expected result, result_ same tensor') assert(torch.pointer(tau) == torch.pointer(tau_), 'expected tau, tau_ same tensor') return result_, tau_ end end -- torch.orgqr() with result tensors given. local function orgqrInPlace(tensorFunc) return function(result, tau) local q = tensorFunc() local q_ = torch.orgqr(q, result, tau) assert(torch.pointer(q) == torch.pointer(q_), 'expected q, q_ same tensor') return q end end -- Test a custom QR routine that calls the LAPACK functions manually. local function qrManual(geqrfFunc, orgqrFunc) return function(x) local m = x:size(1) local n = x:size(2) local k = math.min(m, n) local result, tau = geqrfFunc(x) assert(result:size(1) == m) assert(result:size(2) == n) assert(tau:size(1) == k) local r = torch.triu(result:narrow(1, 1, k)) local q = orgqrFunc(result, tau) return q:narrow(2, 1, k), r end end -- Check that Q multiplied with a matrix with ormqr gives the correct result local function checkQM(testOpts, mat1, mat2) local q, r = torch.qr(mat1) local m, tau = torch.geqrf(mat1) local requiredPrecision = 1e-5 tester:assertTensorEq(torch.mm(q, mat2), torch.ormqr(m, tau, mat2), requiredPrecision) tester:assertTensorEq(torch.mm(mat2, q), torch.ormqr(m, tau, mat2, 'R'), requiredPrecision) tester:assertTensorEq(torch.mm(q:t(), mat2), torch.ormqr(m, tau, mat2, 'L', 'T'), requiredPrecision) tester:assertTensorEq(torch.mm(mat2, q:t()), torch.ormqr(m, tau, mat2, 'R', 'T'), requiredPrecision) end -- Check that the given `q`, `r` matrices are a valid QR decomposition of `a`. local function checkQR(testOpts, a, q, r) local qrFunc = testOpts.qr if not q then q, r = qrFunc(a) end local k = math.min(a:size(1), a:size(2)) tester:asserteq(q:size(1), a:size(1), "Bad size for q first dimension.") tester:asserteq(q:size(2), k, "Bad size for q second dimension.") tester:asserteq(r:size(1), k, "Bad size for r first dimension.") tester:asserteq(r:size(2), a:size(2), "Bad size for r second dimension.") tester:assertTensorEq(q:t() * q, torch.eye(q:size(2)):typeAs(testOpts.tensorFunc()), testOpts.precision, "Q was not orthogonal") tester:assertTensorEq(r, r:triu(), testOpts.precision, "R was not upper triangular") tester:assertTensorEq(q * r, a, testOpts.precision, "QR = A") end -- Do a QR decomposition of `a` and check that the result is valid and matches -- the given expected `q` and `r`. local function checkQRWithExpected(testOpts, a, expected_q, expected_r) local qrFunc = testOpts.qr -- Since the QR decomposition is unique only up to the signs of the rows of -- R, we must ensure these are positive before doing the comparison. local function canonicalize(q, r) local d = r:diag():sign():diag() return q * d, d * r end local q, r = qrFunc(a) local q_canon, r_canon = canonicalize(q, r) local expected_q_canon, expected_r_canon = canonicalize(expected_q, expected_r) tester:assertTensorEq(q_canon, expected_q_canon, testOpts.precision, "Q did not match expected") tester:assertTensorEq(r_canon, expected_r_canon, testOpts.precision, "R did not match expected") checkQR(testOpts, a, q, r) end -- Generate a separate test based on `func` for each of the possible -- combinations of tensor type (double or float) and QR function (torch.qr -- in-place, torch.qr, and manually calling the geqrf and orgqr from Lua -- (both in-place and not). -- -- The tests are added to the given `tests` table, with names generated by -- appending a unique string for the specific combination to `name`. -- -- If opts.doubleTensorOnly is true, then the FloatTensor versions of the test -- will be skipped. local function addTestVariations(tests, name, func, opts) opts = opts or {} local tensorTypes = { [torch.DoubleTensor] = 1e-12, [torch.FloatTensor] = 1e-5, } for tensorFunc, requiredPrecision in pairs(tensorTypes) do local qrFuncs = { ['inPlace'] = qrInPlace(tensorFunc), ['returned'] = qrReturned(tensorFunc), ['manualInPlace'] = qrManual(geqrfInPlace(tensorFunc), orgqrInPlace(tensorFunc)), ['manualReturned'] = qrManual(torch.geqrf, torch.orgqr) } for qrName, qrFunc in pairs(qrFuncs) do local testOpts = { tensorFunc=tensorFunc, precision=requiredPrecision, qr=qrFunc, } local tensorType = tensorFunc():type() local fullName = name .. "_" .. qrName .. "_" .. tensorType assert(not tests[fullName]) if tensorType == 'torch.DoubleTensor' or not opts.doubleTensorOnly then tests[fullName] = function() local state = torch.getRNGState() torch.manualSeed(1) func(testOpts) torch.setRNGState(state) end end end end end -- Decomposing a specific square matrix. addTestVariations(tests, 'qrSquare', function(testOpts) return function(testOpts) local tensorFunc = testOpts.tensorFunc local a = tensorFunc{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}} local expected_q = tensorFunc{ {-1.230914909793328e-01, 9.045340337332914e-01, 4.082482904638621e-01}, {-4.923659639173310e-01, 3.015113445777629e-01, -8.164965809277264e-01}, {-8.616404368553292e-01, -3.015113445777631e-01, 4.082482904638634e-01}, } local expected_r = tensorFunc{ {-8.124038404635959e+00, -9.601136296387955e+00, -1.107823418813995e+01}, { 0.000000000000000e+00, 9.045340337332926e-01, 1.809068067466585e+00}, { 0.000000000000000e+00, 0.000000000000000e+00, -8.881784197001252e-16}, } checkQRWithExpected(testOpts, a, expected_q, expected_r) end end, {doubleTensorOnly=true}) -- Decomposing a specific (wide) rectangular matrix. addTestVariations(tests, 'qrRectFat', function(testOpts) -- The matrix is chosen to be full-rank. local a = testOpts.tensorFunc{ {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 13} } local expected_q = testOpts.tensorFunc{ {-0.0966736489045663, 0.907737593658436 , 0.4082482904638653}, {-0.4833682445228317, 0.3157348151855452, -0.8164965809277254}, {-0.870062840141097 , -0.2762679632873518, 0.4082482904638621} } local expected_r = testOpts.tensorFunc{ { -1.0344080432788603e+01, -1.1794185166357092e+01, -1.3244289899925587e+01, -1.5564457473635180e+01}, { 0.0000000000000000e+00, 9.4720444555662542e-01, 1.8944088911132546e+00, 2.5653453733825331e+00}, { 0.0000000000000000e+00, 0.0000000000000000e+00, 1.5543122344752192e-15, 4.0824829046386757e-01} } checkQRWithExpected(testOpts, a, expected_q, expected_r) end, {doubleTensorOnly=true}) -- Decomposing a specific (thin) rectangular matrix. addTestVariations(tests, 'qrRectThin', function(testOpts) -- The matrix is chosen to be full-rank. local a = testOpts.tensorFunc{ { 1, 2, 3}, { 4, 5, 6}, { 7, 8, 9}, {10, 11, 13}, } local expected_q = testOpts.tensorFunc{ {-0.0776150525706334, -0.833052161400748 , 0.3651483716701106}, {-0.3104602102825332, -0.4512365874254053, -0.1825741858350556}, {-0.5433053679944331, -0.0694210134500621, -0.7302967433402217}, {-0.7761505257063329, 0.3123945605252804, 0.5477225575051663} } local expected_r = testOpts.tensorFunc{ {-12.8840987267251261, -14.5916298832790581, -17.0753115655393231}, { 0, -1.0413152017509357, -1.770235842976589 }, { 0, 0, 0.5477225575051664} } checkQRWithExpected(testOpts, a, expected_q, expected_r) end, {doubleTensorOnly=true}) -- Decomposing a sequence of medium-sized random matrices. addTestVariations(tests, 'randomMediumQR', function(testOpts) for x = 0, 10 do for y = 0, 10 do local m = math.pow(2, x) local n = math.pow(2, y) local x = torch.rand(m, n) checkQR(testOpts, x:typeAs(testOpts.tensorFunc())) end end end) -- Decomposing a sequence of small random matrices. addTestVariations(tests, 'randomSmallQR', function(testOpts) for m = 1, 40 do for n = 1, 40 do checkQR(testOpts, torch.rand(m, n):typeAs(testOpts.tensorFunc())) end end end) -- Decomposing a sequence of small matrices that are not contiguous in memory. addTestVariations(tests, 'randomNonContiguous', function(testOpts) for m = 2, 40 do for n = 2, 40 do local x = torch.rand(m, n):t() tester:assert(not x:isContiguous(), "x should not be contiguous") checkQR(testOpts, x:typeAs(testOpts.tensorFunc())) end end end) function tests.testQM() checkQM({}, torch.randn(10, 10), torch.randn(10, 10)) -- checkQM({}, torch.randn(20, 10), torch.randn(20, 20)) end tester:add(tests) tester:run() test/test_sharedmem.lua000066400000000000000000000057261316246254300155640ustar00rootroot00000000000000require 'torch' local ffi = require 'ffi' local tester = torch.Tester() local tests = torch.TestSuite() local function createSharedMemStorage(name, size, storageType) local storageType = storageType or 'FloatStorage' local shmName = name or os.tmpname():gsub('/','_') local isShared = true local isSharedMem = true local nElements = size or torch.random(10000, 20000) local storage = torch[storageType](shmName, isShared, nElements, isSharedMem) return storage, shmName end local function shmFilePath(shmName) return (ffi.os ~= 'Windows' and '/dev/shm/' or '') .. shmName end local function removeShmFile(shmFileName) if ffi.os == 'Windows' then os.remove(shmFileName) end end function tests.createSharedMemFile() local storage, shmName = createSharedMemStorage() local shmFileName = shmFilePath(shmName) -- check that file is at /dev/shm tester:assert(paths.filep(shmFileName), 'Shared memory file exists') -- collect storage and make sure that file is gone storage = nil collectgarbage() collectgarbage() removeShmFile(shmFileName) tester:assert(not paths.filep(shmFileName), 'Shared memory file does not exists') end function tests.checkContents() local storage, shmName = createSharedMemStorage() local shmFileName = shmFilePath(shmName) local tensor = torch.FloatTensor(storage, 1, torch.LongStorage{storage:size()}) tensor:copy(torch.rand(storage:size())) local sharedFile = torch.DiskFile(shmFileName, 'r'):binary() for i = 1, storage:size() do tester:assert(sharedFile:readFloat() == storage[i], 'value is not correct') end sharedFile:close() removeShmFile(shmFileName) end function tests.testSharing() -- since we are going to cast numbers into double (lua default) -- we specifically generate double storage local storage, shmName = createSharedMemStorage(nil, nil, 'DoubleStorage') local shmFileName = shmFilePath(shmName) local tensor = torch.DoubleTensor(storage, 1, torch.LongStorage{storage:size()}) tensor:copy(torch.rand(storage:size())) local tensorCopy = tensor.new():resizeAs(tensor):copy(tensor) -- access the same shared memory file as regular mapping from same process local storage2 = torch.DoubleStorage(shmFileName, true, storage:size()) local tensor2 = torch.DoubleTensor(storage2, 1, torch.LongStorage{storage2:size()}) local tensor2Copy = tensor2.new():resizeAs(tensor2):copy(tensor2) tester:assertTensorEq(tensorCopy, tensor2Copy, 0, 'contents don\'t match') -- fill tensor 1 with a random value and read from 2 local rval = torch.uniform() tensor:fill(rval) for i = 1, tensor2:size(1) do tester:asserteq(tensor2[i], rval, 'content is wrong') end -- fill tensor 2 with a random value and read from 1 local rval = torch.uniform() tensor2:fill(rval) for i = 1, tensor:size(1) do tester:asserteq(tensor[i], rval, 'content is wrong') end removeShmFile(shmFileName) end tester:add(tests) tester:run() test/test_timer.lua000066400000000000000000000031221316246254300147230ustar00rootroot00000000000000require 'torch' local ffi = require 'ffi' local tester = torch.Tester() local tests = torch.TestSuite() function tests.timerTime() local timer = torch.Timer() local function wait(seconds) if ffi.os == 'Windows' then os.execute(string.format('ping 127.0.0.1 -n %d > nul', seconds + 1)) else os.execute(string.format('sleep %d > nul', seconds)) end end timer:reset() wait(1) local passed_time = timer:time().real tester:assert(passed_time < 1.1, ("Too long time passed: %.1f sec >= 1.1 sec"):format(passed_time)) tester:assert(passed_time > 0.9, ("Too short time passed: %.1f sec <= 0.9 sec"):format(passed_time)) timer:stop() wait(1) passed_time = timer:time().real tester:assert(passed_time < 1.1, ("Too long time passed: %.1f sec >= 1.1 sec"):format(passed_time)) tester:assert(passed_time > 0.9, ("Too short time passed: %.1f sec <= 0.9 sec"):format(passed_time)) timer:resume() wait(1) passed_time = timer:time().real tester:assert(passed_time < 2.2, ("Too long time passed: %.1f sec >= 2.2 sec"):format(passed_time)) tester:assert(passed_time > 1.8, ("Too short time passed: %.1f sec <= 1.8 sec"):format(passed_time)) timer:reset() wait(1) passed_time = timer:time().real tester:assert(passed_time < 1.1, ("Too long time passed: %.1f sec >= 1.1 sec"):format(passed_time)) tester:assert(passed_time > 0.9, ("Too short time passed: %.1f sec <= 0.9 sec"):format(passed_time)) end tester:add(tests) tester:run() test/test_writeObject.lua000066400000000000000000000150341316246254300160710ustar00rootroot00000000000000require 'torch' local myTester = torch.Tester() local tests = torch.TestSuite() function torch.HalfTensor:norm() return self:real():norm() end -- checks that an object can be written and unwritten -- returns false if an error occurs local function serializeAndDeserialize(obj) local file = torch.MemoryFile() file:binary() local ok, msg = pcall (file.writeObject, file, obj) myTester:assert(ok, 'error in writing an object' ) file:seek(1) local ok, copy = pcall(file.readObject, file) if not ok then print(copy) end myTester:assert(ok, 'error in reading an object ') return copy end function tests.test_can_write_a_nil_closure() local a local function closure() if not a then return 1 end return 0 end local copyClosure = serializeAndDeserialize(closure) myTester:assert(copyClosure() == closure(), 'the closures should give same output') end function tests.test_nil_upvalues_in_closure() local a = 1 local b local c = 2 local function closure() if not b then return c end return a end local copyClosure = serializeAndDeserialize(closure) myTester:assert(copyClosure() == closure(), 'the closures should give same output') end function tests.test_global_function_in_closure() local x = "5" local function closure(str) return tonumber(str .. x) end local copyClosure = serializeAndDeserialize(closure) myTester:assert(copyClosure("3") == closure("3"), 'the closures should give same output') end function tests.test_a_recursive_closure() local foo foo = function (level) if level == 1 then return 1 end return 1+foo(level-1) end local copyFoo = serializeAndDeserialize(foo) myTester:assert(copyFoo(42) == foo(42), 'the closures should give same output') end function tests.test_a_tensor() for k,v in ipairs({"real", "half"}) do tests_test_a_tensor(torch.getmetatable(torch.Tensor():type())[v]) end end function tests_test_a_tensor(func) local x = func(torch.rand(5, 10)) local xcopy = serializeAndDeserialize(x) myTester:assert(x:norm() == xcopy:norm(), 'tensors should be the same') end -- Regression test for bug reported in issue 456. function tests.test_empty_table() local file = torch.MemoryFile() file:writeObject({}) end function tests.test_error_msg() local torch = torch local inner = { baz = function(a) torch.somefunc() end } local outer = { theinner = inner } local function evil_func() outer.prop = 1 image.compress(1) end local ok, msg = pcall(torch.save, 'saved.t7', evil_func) myTester:assert(not ok) myTester:assert(msg:find('at <%?>%.outer%.theinner%.baz%.torch') ~= nil) end function tests.test_warning_msg() local foo = {} torch.class('Bar', foo) local obj = foo.Bar() local tensor = torch.Tensor() obj.data = tensor:cdata() -- pick something NOT writable local file = torch.MemoryFile('rw'):binary() local ok, _ = pcall(torch.File.writeObject, file, obj) -- only a warning is printed on STDOUT: -- $ Warning: cannot write object field of myTester:assert(ok) file:close() end function tests.test_referenced() local file = torch.MemoryFile('rw'):binary() file:referenced(false) local foo = 'bar' file:writeObject(foo) file:close() end function tests.test_shared_upvalues() if debug.upvalueid then local i=1 local j=2 local func = {} func.increment = function() i=i+1 j=j+2 end func.get_i = function() return i end func.get_j = function() return j end local copyFunc = serializeAndDeserialize(func) myTester:assert(copyFunc.get_i()==1) myTester:assert(copyFunc.get_j()==2) copyFunc.increment() myTester:assert(copyFunc.get_i()==2) myTester:assert(copyFunc.get_j()==4) else print('Not running shared upvalues test, as we are in Lua-5.1') end end -- checks that the hook function works properly -- returns false if an error occurs function tests.test_SerializationHook() -- Simpel uuid implementation from [https://gist.github.com/jrus/3197011] -- The only goal is to aoid collisions within the scope of tests, -- so more than enough. local random = math.random local function uuid() local template ='xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx' return string.gsub(template, '[xy]', function (c) local v = (c == 'x') and random(0, 0xf) or random(8, 0xb) return string.format('%x', v) end) end local unique1 = uuid() local unique2 = uuid() local class = {} -- Create 2 classes local spec = torch.class('class.'.. unique1, class) function spec:test() return false end local gen = torch.class('class.' .. unique2, class) function gen:test() return true end local hook = function(object) local class = class local newObject = object if torch.typename(object) == 'class.'..unique1 then newObject = class[unique2]() end return newObject end -- Write to 2 files, first without hooking, -- second with hooking local file = torch.MemoryFile('rw') file:binary() local file2 = torch.MemoryFile('rw') file2:binary() local s = class[unique1]() local object = {s1 = s, v = 'test', g = class[unique2](), s2 = s} file:writeObject(object) file2:writeObject(object, nil, hook) -- unregister class[unique1] and try to reload the first serialized object if debug and debug.getregistry then local ok, res = pcall(function() classTestSerializationHook1 = nil debug.getregistry()[classTestSerializationHook1] = nil file:seek(1) return file:readObject() end) myTester:assert(not ok) else print('Not running serialization hook failure test because debug is missing.') end -- Try to reload the second serialized object local ok, clone = pcall(function() file2:seek(1) return file2:readObject() end) -- Test that everything happened smoothly myTester:assert(clone.v == 'test') myTester:assert(torch.typename(clone.s1) == 'class.' .. unique2) myTester:assert(clone.s1:test() and clone.s2:test()) myTester:assert(string.format('%x',torch.pointer(clone.s1)) == string.format('%x',torch.pointer(clone.s2))) end function tests.test_serializeToStorage() torch.save("foo.t7", "foo") local f = io.open("foo.t7", "rb") local size = f:seek("end") f:close() myTester:eq( torch.serializeToStorage("foo"):size(), size, "memory and disk serializations should have the same size" ) end myTester:add(tests) myTester:run() if myTester.errors[1] then os.exit(1) end test/timeSort.lua000066400000000000000000000120651316246254300143600ustar00rootroot00000000000000-- gnuplot.figure(2) -- Test torch sort, show it suffers from the problems of quicksort -- i.e. complexity O(N^2) in worst-case of sorted list require 'gnuplot' local ffi = require 'ffi' local cmd = torch.CmdLine() cmd:option('-N', 10^7, 'Maximum array size') cmd:option('-p', 50, 'Number of points in logspace') cmd:option('-r', 20, 'Number of repetitions') local options = cmd:parse(arg or {}) function main() local log10 = math.log10 or function(x) return math.log(x, 10) end local pow10 = torch.linspace(1,log10(options.N), options.p) local num_sizes = options.p local num_reps = options.r local old_rnd = torch.zeros(num_sizes, num_reps) local old_srt = torch.zeros(num_sizes, num_reps) local old_cst = torch.zeros(num_sizes, num_reps) local new_rnd = torch.zeros(num_sizes, num_reps) local new_srt = torch.zeros(num_sizes, num_reps) local new_cst = torch.zeros(num_sizes, num_reps) local ratio_rnd = torch.zeros(num_sizes, num_reps) local ratio_srt = torch.zeros(num_sizes, num_reps) local ratio_cst = torch.zeros(num_sizes, num_reps) -- Ascending sort uses new sort local function time_sort(x) collectgarbage() local start = os.clock() torch.sort(x,false) return (os.clock()-start) end -- Descending sort uses old sort local function time_old_sort(x) collectgarbage() local start = os.clock() torch.sort(x,true) return (os.clock()-start) end local benches = { function(i,j,n) -- on random local input = torch.rand(n) new_rnd[i][j] = time_sort(input:clone()) old_rnd[i][j] = time_old_sort(input:clone()) end, function(i,j,n) -- on sorted new_srt[i][j] = time_sort(torch.linspace(0,1,n)) old_srt[i][j] = time_old_sort(torch.linspace(0,1,n):add(-1):mul(-1)) -- old_time is called on descending sort, hence the reversed input end, function(i,j,n) -- on constant new_cst[i][j] = time_sort(torch.zeros(n)) old_cst[i][j] = time_old_sort(torch.zeros(n)) end } local num_benches = #benches local num_exps = num_sizes * num_benches * num_reps -- Full randomization local perm = torch.randperm(num_exps):long() local perm_benches = torch.Tensor(num_exps) local perm_reps = torch.Tensor(num_exps) local perm_sizes = torch.Tensor(num_exps) local l = 1 for i=1, num_sizes do for j=1, num_reps do for k=1, num_benches do perm_benches[ perm[l] ] = k perm_reps[ perm[l] ] = j perm_sizes[ perm[l] ] = i l = l+1 end end end local pc = 0 for j = 1, num_exps do local n = 10^pow10[perm_sizes[j]] -- print(string.format('rep %d / %d, bench %d, size %d, rep %d\n', j, num_exps, perm_benches[j], n, perm_reps[j])) if math.floor(100*j/num_exps) > pc then pc = math.floor(100*j/num_exps) io.write('.') if pc % 10 == 0 then io.write(' ' .. pc .. '%\n') end io.flush() end benches[perm_benches[j]](perm_sizes[j], perm_reps[j], n) end ratio_rnd = torch.cdiv(old_rnd:mean(2), new_rnd:mean(2)) ratio_srt = torch.cdiv(old_srt:mean(2), new_srt:mean(2)) ratio_cst = torch.cdiv(old_cst:mean(2), new_cst:mean(2)) local N = pow10:clone():apply(function(x) return 10^x end) if ffi.os == 'Windows' then gnuplot.setterm('windows') else gnuplot.setterm('x11') end gnuplot.figure(1) gnuplot.raw('set log x; set mxtics 10') gnuplot.raw('set grid mxtics mytics xtics ytics') gnuplot.raw('set xrange [' .. N:min() .. ':' .. N:max() .. ']' ) gnuplot.plot({'Random - new', N, new_rnd:mean(2)}, {'Sorted - new', N, new_srt:mean(2)}, {'Constant - new', N, new_cst:mean(2)}, {'Random - old', N, old_rnd:mean(2)}, {'Sorted - old', N, old_srt:mean(2)}, {'Constant - old', N, old_cst:mean(2)}) gnuplot.xlabel('N') gnuplot.ylabel('Time (s)') gnuplot.figprint('benchmarkTime.png') gnuplot.figure(2) gnuplot.raw('set log x; set mxtics 10') gnuplot.raw('set grid mxtics mytics xtics ytics') gnuplot.raw('set xrange [' .. N:min() .. ':' .. N:max() .. ']' ) gnuplot.plot({'Random', N, ratio_rnd:mean(2)}, {'Sorted', N, ratio_srt:mean(2)}, {'Constant', N, ratio_cst:mean(2)}) gnuplot.xlabel('N') gnuplot.ylabel('Speed-up Factor (s)') gnuplot.figprint('benchmarkRatio.png') torch.save('benchmark.t7', { new_rnd=new_rnd, new_srt=new_srt, new_cst=new_cst, old_rnd=old_rnd, old_srt=old_srt, old_cst=old_cst, ratio_rnd=ratio_rnd, ratio_srt=ratio_srt, ratio_cst=ratio_cst, pow10 = pow10, num_reps = num_reps }) end main() torchcwrap.lua000066400000000000000000000457451316246254300137620ustar00rootroot00000000000000local wrap = require 'cwrap' local types = wrap.types types.Tensor = { helpname = function(arg) if arg.dim then return string.format("Tensor~%dD", arg.dim) else return "Tensor" end end, declare = function(arg) local txt = {} table.insert(txt, string.format("THTensor *arg%d = NULL;", arg.i)) if arg.returned then table.insert(txt, string.format("int arg%d_idx = 0;", arg.i)); end return table.concat(txt, '\n') end, check = function(arg, idx) if arg.dim then return string.format("(arg%d = luaT_toudata(L, %d, torch_Tensor)) && (arg%d->nDimension == %d)", arg.i, idx, arg.i, arg.dim) else return string.format("(arg%d = luaT_toudata(L, %d, torch_Tensor))", arg.i, idx) end end, read = function(arg, idx) if arg.returned then return string.format("arg%d_idx = %d;", arg.i, idx) end end, init = function(arg) if type(arg.default) == 'boolean' then return string.format('arg%d = THTensor_(new)();', arg.i) elseif type(arg.default) == 'number' then return string.format('arg%d = %s;', arg.i, arg.args[arg.default]:carg()) else error('unknown default tensor type value') end end, carg = function(arg) return string.format('arg%d', arg.i) end, creturn = function(arg) return string.format('arg%d', arg.i) end, precall = function(arg) local txt = {} if arg.default and arg.returned then table.insert(txt, string.format('if(arg%d_idx)', arg.i)) -- means it was passed as arg table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i)) table.insert(txt, string.format('else')) if type(arg.default) == 'boolean' then -- boolean: we did a new() table.insert(txt, string.format('luaT_pushudata(L, arg%d, torch_Tensor);', arg.i)) else -- otherwise: point on default tensor --> retain table.insert(txt, string.format('{')) table.insert(txt, string.format('THTensor_(retain)(arg%d);', arg.i)) -- so we need a retain table.insert(txt, string.format('luaT_pushudata(L, arg%d, torch_Tensor);', arg.i)) table.insert(txt, string.format('}')) end elseif arg.default then -- we would have to deallocate the beast later if we did a new -- unlikely anyways, so i do not support it for now if type(arg.default) == 'boolean' then error('a tensor cannot be optional if not returned') end elseif arg.returned then table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i)) end return table.concat(txt, '\n') end, postcall = function(arg) local txt = {} if arg.creturned then -- this next line is actually debatable table.insert(txt, string.format('THTensor_(retain)(arg%d);', arg.i)) table.insert(txt, string.format('luaT_pushudata(L, arg%d, torch_Tensor);', arg.i)) end return table.concat(txt, '\n') end } types.Generator = { helpname = function(arg) return "Generator" end, declare = function(arg) return string.format("THGenerator *arg%d = NULL;", arg.i) end, check = function(arg, idx) return string.format("(arg%d = luaT_toudata(L, %d, torch_Generator))", arg.i, idx) end, read = function(arg, idx) end, init = function(arg) local text = {} -- If no generator is supplied, pull the default out of the torch namespace. table.insert(text, 'lua_getglobal(L,"torch");') table.insert(text, string.format('arg%d = luaT_getfieldcheckudata(L, -1, "_gen", torch_Generator);', arg.i)) table.insert(text, 'lua_pop(L, 2);') return table.concat(text, '\n') end, carg = function(arg) return string.format('arg%d', arg.i) end, creturn = function(arg) return string.format('arg%d', arg.i) end, precall = function(arg) end, postcall = function(arg) end } types.IndexTensor = { helpname = function(arg) return "LongTensor" end, declare = function(arg) local txt = {} table.insert(txt, string.format("THLongTensor *arg%d = NULL;", arg.i)) if arg.returned then table.insert(txt, string.format("int arg%d_idx = 0;", arg.i)); end return table.concat(txt, '\n') end, check = function(arg, idx) return string.format('(arg%d = luaT_toudata(L, %d, "torch.LongTensor"))', arg.i, idx) end, read = function(arg, idx) local txt = {} if not arg.noreadadd then table.insert(txt, string.format("THLongTensor_add(arg%d, arg%d, -1);", arg.i, arg.i)); end if arg.returned then table.insert(txt, string.format("arg%d_idx = %d;", arg.i, idx)) end return table.concat(txt, '\n') end, init = function(arg) return string.format('arg%d = THLongTensor_new();', arg.i) end, carg = function(arg) return string.format('arg%d', arg.i) end, creturn = function(arg) return string.format('arg%d', arg.i) end, precall = function(arg) local txt = {} if arg.default and arg.returned then table.insert(txt, string.format('if(arg%d_idx)', arg.i)) -- means it was passed as arg table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i)) table.insert(txt, string.format('else')) -- means we did a new() table.insert(txt, string.format('luaT_pushudata(L, arg%d, "torch.LongTensor");', arg.i)) elseif arg.default then error('a tensor cannot be optional if not returned') elseif arg.returned then table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i)) end return table.concat(txt, '\n') end, postcall = function(arg) local txt = {} if arg.creturned or arg.returned then table.insert(txt, string.format("THLongTensor_add(arg%d, arg%d, 1);", arg.i, arg.i)); end if arg.creturned then -- this next line is actually debatable table.insert(txt, string.format('THLongTensor_retain(arg%d);', arg.i)) table.insert(txt, string.format('luaT_pushudata(L, arg%d, "torch.LongTensor");', arg.i)) end return table.concat(txt, '\n') end } for _,typename in ipairs({"ByteTensor", "CharTensor", "ShortTensor", "IntTensor", "LongTensor", "FloatTensor", "HalfTensor", "DoubleTensor"}) do types[typename] = { helpname = function(arg) if arg.dim then return string.format('%s~%dD', typename, arg.dim) else return typename end end, declare = function(arg) local txt = {} table.insert(txt, string.format("TH%s *arg%d = NULL;", typename, arg.i)) if arg.returned then table.insert(txt, string.format("int arg%d_idx = 0;", arg.i)); end return table.concat(txt, '\n') end, check = function(arg, idx) if arg.dim then return string.format('(arg%d = luaT_toudata(L, %d, "torch.%s")) && (arg%d->nDimension == %d)', arg.i, idx, typename, arg.i, arg.dim) else return string.format('(arg%d = luaT_toudata(L, %d, "torch.%s"))', arg.i, idx, typename) end end, read = function(arg, idx) if arg.returned then return string.format("arg%d_idx = %d;", arg.i, idx) end end, init = function(arg) if type(arg.default) == 'boolean' then return string.format('arg%d = TH%s_new();', arg.i, typename) elseif type(arg.default) == 'number' then return string.format('arg%d = %s;', arg.i, arg.args[arg.default]:carg()) else error('unknown default tensor type value') end end, carg = function(arg) return string.format('arg%d', arg.i) end, creturn = function(arg) return string.format('arg%d', arg.i) end, precall = function(arg) local txt = {} if arg.default and arg.returned then table.insert(txt, string.format('if(arg%d_idx)', arg.i)) -- means it was passed as arg table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i)) table.insert(txt, string.format('else')) if type(arg.default) == 'boolean' then -- boolean: we did a new() table.insert(txt, string.format('luaT_pushudata(L, arg%d, "torch.%s");', arg.i, typename)) else -- otherwise: point on default tensor --> retain table.insert(txt, string.format('{')) table.insert(txt, string.format('TH%s_retain(arg%d);', typename, arg.i)) -- so we need a retain table.insert(txt, string.format('luaT_pushudata(L, arg%d, "torch.%s");', arg.i, typename)) table.insert(txt, string.format('}')) end elseif arg.default then -- we would have to deallocate the beast later if we did a new -- unlikely anyways, so i do not support it for now if type(arg.default) == 'boolean' then error('a tensor cannot be optional if not returned') end elseif arg.returned then table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i)) end return table.concat(txt, '\n') end, postcall = function(arg) local txt = {} if arg.creturned then -- this next line is actually debatable table.insert(txt, string.format('TH%s_retain(arg%d);', typename, arg.i)) table.insert(txt, string.format('luaT_pushudata(L, arg%d, "torch.%s");', arg.i, typename)) end return table.concat(txt, '\n') end } types[typename .. 'Array'] = { helpname = function(arg) return string.format('{%s+}', typename) end, declare = function(arg) local txt = {} table.insert(txt, string.format('TH%s **arg%d_data = NULL;', typename, arg.i)) table.insert(txt, string.format('long arg%d_size = 0;', arg.i)) table.insert(txt, string.format('int arg%d_i = 0;', arg.i)) return table.concat(txt, '\n') end, check = function(arg, idx) return string.format('torch_isnonemptytable(L, %d)', idx) end, read = function(arg, idx) local txt = {} -- Iterate over the array to find its length, leave elements on stack. table.insert(txt, string.format('do')) table.insert(txt, string.format('{')) table.insert(txt, string.format(' arg%d_size++;', arg.i)) table.insert(txt, string.format(' lua_checkstack(L, 1);')) table.insert(txt, string.format(' lua_rawgeti(L, %d, arg%d_size);', idx, arg.i)) table.insert(txt, string.format('}')) table.insert(txt, string.format('while (!lua_isnil(L, -1));')) table.insert(txt, string.format('arg%d_size--;', arg.i)) -- Pop nil element from stack. table.insert(txt, string.format('lua_pop(L, 1);')) -- Allocate tensor pointers and read values from stack backwards. table.insert(txt, string.format('arg%d_data = (TH%s**)THAlloc(arg%d_size * sizeof(TH%s*));', arg.i, typename, arg.i, typename)) table.insert(txt, string.format('for (arg%d_i = arg%d_size - 1; arg%d_i >= 0; arg%d_i--)', arg.i, arg.i, arg.i, arg.i)) table.insert(txt, string.format('{')) table.insert(txt, string.format(' if (!(arg%d_data[arg%d_i] = luaT_toudata(L, -1, "torch.%s")))', arg.i, arg.i, typename)) table.insert(txt, string.format(' luaL_error(L, "expected %s in tensor array");', typename)) table.insert(txt, string.format(' lua_pop(L, 1);')) table.insert(txt, string.format('}')) table.insert(txt, string.format('')) return table.concat(txt, '\n') end, init = function(arg) end, carg = function(arg) return string.format('arg%d_data,arg%d_size', arg.i, arg.i) end, creturn = function(arg) error('TensorArray cannot be returned.') end, precall = function(arg) end, postcall = function(arg) return string.format('THFree(arg%d_data);', arg.i) end } end types.LongArg = { vararg = true, helpname = function(arg) return "(LongStorage | dim1 [dim2...])" end, declare = function(arg) return string.format("THLongStorage *arg%d = NULL;", arg.i) end, init = function(arg) if arg.default then error('LongArg cannot have a default value') end end, check = function(arg, idx) return string.format("torch_islongargs(L, %d)", idx) end, read = function(arg, idx) return string.format("arg%d = torch_checklongargs(L, %d);", arg.i, idx) end, carg = function(arg, idx) return string.format('arg%d', arg.i) end, creturn = function(arg, idx) return string.format('arg%d', arg.i) end, precall = function(arg) local txt = {} if arg.returned then table.insert(txt, string.format('luaT_pushudata(L, arg%d, "torch.LongStorage");', arg.i)) end return table.concat(txt, '\n') end, postcall = function(arg) local txt = {} if arg.creturned then -- this next line is actually debatable table.insert(txt, string.format('THLongStorage_retain(arg%d);', arg.i)) table.insert(txt, string.format('luaT_pushudata(L, arg%d, "torch.LongStorage");', arg.i)) end if not arg.returned and not arg.creturned then table.insert(txt, string.format('THLongStorage_free(arg%d);', arg.i)) end return table.concat(txt, '\n') end } types.charoption = { helpname = function(arg) if arg.values then return "(" .. table.concat(arg.values, '|') .. ")" end end, declare = function(arg) local txt = {} table.insert(txt, string.format("const char *arg%d = NULL;", arg.i)) if arg.default then table.insert(txt, string.format("char arg%d_default = '%s';", arg.i, arg.default)) end return table.concat(txt, '\n') end, init = function(arg) return string.format("arg%d = &arg%d_default;", arg.i, arg.i) end, check = function(arg, idx) local txt = {} local txtv = {} table.insert(txt, string.format('(arg%d = lua_tostring(L, %d)) && (', arg.i, idx)) for _,value in ipairs(arg.values) do table.insert(txtv, string.format("*arg%d == '%s'", arg.i, value)) end table.insert(txt, table.concat(txtv, ' || ')) table.insert(txt, ')') return table.concat(txt, '') end, read = function(arg, idx) end, carg = function(arg, idx) return string.format('arg%d', arg.i) end, creturn = function(arg, idx) end, precall = function(arg) end, postcall = function(arg) end } for _,typename in ipairs({"ptrdiff_t", "size_t"}) do types[typename] = { helpname = function(arg) return typename end, declare = function(arg) -- if it is a number we initialize here local default = tonumber(tostring(arg.default)) or 0 return string.format("%s arg%d = %g;", typename, arg.i, default) end, check = function(arg, idx) return string.format("lua_isnumber(L, %d)", idx) end, read = function(arg, idx) return string.format("arg%d = (%s)lua_tonumber(L, %d);", arg.i, typename, idx) end, init = function(arg) -- otherwise do it here if arg.default then local default = tostring(arg.default) if not tonumber(default) then return string.format("arg%d = %s;", arg.i, default) end end end, carg = function(arg) return string.format('arg%d', arg.i) end, creturn = function(arg) return string.format('arg%d', arg.i) end, precall = function(arg) if arg.returned then return string.format('lua_pushnumber(L, (lua_Number)arg%d);', arg.i) end end, postcall = function(arg) if arg.creturned then return string.format('lua_pushnumber(L, (lua_Number)arg%d);', arg.i) end end } end utils.c000066400000000000000000000124541316246254300123760ustar00rootroot00000000000000#include "general.h" #include "utils.h" #ifdef WIN32 # include #else # include #endif THLongStorage* torch_checklongargs(lua_State *L, int index) { THLongStorage *storage; int i; int narg = lua_gettop(L)-index+1; if(narg == 1 && luaT_toudata(L, index, "torch.LongStorage")) { THLongStorage *storagesrc = luaT_toudata(L, index, "torch.LongStorage"); storage = THLongStorage_newWithSize(storagesrc->size); THLongStorage_copy(storage, storagesrc); } else { storage = THLongStorage_newWithSize(narg); for(i = index; i < index+narg; i++) { if(!lua_isnumber(L, i)) { THLongStorage_free(storage); luaL_argerror(L, i, "number expected"); } THLongStorage_set(storage, i-index, lua_tonumber(L, i)); } } return storage; } int torch_islongargs(lua_State *L, int index) { int narg = lua_gettop(L)-index+1; if(narg == 1 && luaT_toudata(L, index, "torch.LongStorage")) { return 1; } else { int i; for(i = index; i < index+narg; i++) { if(!lua_isnumber(L, i)) return 0; } return 1; } return 0; } #ifdef _WIN32 #include #include static __declspec( thread ) LARGE_INTEGER ticksPerSecond = { 0 }; #endif static int torch_isatty(lua_State *L) { FILE **fp = (FILE **) luaL_checkudata(L, -1, LUA_FILEHANDLE); #ifdef _WIN32 lua_pushboolean(L, _isatty(_fileno(*fp))); #else lua_pushboolean(L, isatty(fileno(*fp))); #endif return 1; } static double real_time() { #ifdef _WIN32 if (ticksPerSecond.QuadPart == 0) { QueryPerformanceFrequency(&ticksPerSecond); } LARGE_INTEGER current; QueryPerformanceCounter(¤t); return (double)(current.QuadPart) / ticksPerSecond.QuadPart; #else struct timeval current; gettimeofday(¤t, NULL); return (current.tv_sec + current.tv_usec/1000000.0); #endif } static int torch_lua_tic(lua_State* L) { double ttime = real_time(); lua_pushnumber(L,ttime); return 1; } static int torch_lua_toc(lua_State* L) { double toctime = real_time(); lua_Number tictime = luaL_checknumber(L,1); lua_pushnumber(L,toctime-tictime); return 1; } static int torch_lua_getdefaulttensortype(lua_State *L) { const char* tname = torch_getdefaulttensortype(L); if(tname) { lua_pushstring(L, tname); return 1; } return 0; } const char* torch_getdefaulttensortype(lua_State *L) { lua_getglobal(L, "torch"); if(lua_istable(L, -1)) { lua_getfield(L, -1, "Tensor"); if(lua_istable(L, -1)) { if(lua_getmetatable(L, -1)) { lua_pushstring(L, "__index"); lua_rawget(L, -2); if(lua_istable(L, -1)) { lua_rawget(L, LUA_REGISTRYINDEX); if(lua_isstring(L, -1)) { const char *tname = lua_tostring(L, -1); lua_pop(L, 4); return tname; } } else { lua_pop(L, 4); return NULL; } } else { lua_pop(L, 2); return NULL; } } else { lua_pop(L, 2); return NULL; } } else { lua_pop(L, 1); return NULL; } return NULL; } static int torch_getnumthreads(lua_State *L) { lua_pushinteger(L, THGetNumThreads()); return 1; } static int torch_setnumthreads(lua_State *L) { THSetNumThreads(luaL_checkint(L, 1)); return 0; } static int torch_getnumcores(lua_State *L) { lua_pushinteger(L, THGetNumCores()); return 1; } static void luaTorchGCFunction(void *data) { lua_State *L = data; lua_gc(L, LUA_GCCOLLECT, 0); } static int torch_setheaptracking(lua_State *L) { int enabled = luaT_checkboolean(L,1); lua_getglobal(L, "torch"); lua_pushboolean(L, enabled); lua_setfield(L, -2, "_heaptracking"); if(enabled) { THSetGCHandler(luaTorchGCFunction, L); } else { THSetGCHandler(NULL, NULL); } return 0; } static void luaTorchErrorHandlerFunction(const char *msg, void *data) { lua_State *L = data; luaL_error(L, msg); } static void luaTorchArgErrorHandlerFunction(int argNumber, const char *msg, void *data) { lua_State *L = data; luaL_argcheck(L, 0, argNumber, msg); } static int torch_updateerrorhandlers(lua_State *L) { THSetErrorHandler(luaTorchErrorHandlerFunction, L); THSetArgErrorHandler(luaTorchArgErrorHandlerFunction, L); return 0; } static const struct luaL_Reg torch_utils__ [] = { {"getdefaulttensortype", torch_lua_getdefaulttensortype}, {"isatty", torch_isatty}, {"tic", torch_lua_tic}, {"toc", torch_lua_toc}, {"setnumthreads", torch_setnumthreads}, {"getnumthreads", torch_getnumthreads}, {"getnumcores", torch_getnumcores}, {"factory", luaT_lua_factory}, {"getconstructortable", luaT_lua_getconstructortable}, {"typename", luaT_lua_typename}, {"isequal", luaT_lua_isequal}, {"getenv", luaT_lua_getenv}, {"setenv", luaT_lua_setenv}, {"newmetatable", luaT_lua_newmetatable}, {"setmetatable", luaT_lua_setmetatable}, {"getmetatable", luaT_lua_getmetatable}, {"metatype", luaT_lua_metatype}, {"pushudata", luaT_lua_pushudata}, {"version", luaT_lua_version}, {"pointer", luaT_lua_pointer}, {"setheaptracking", torch_setheaptracking}, {"updateerrorhandlers", torch_updateerrorhandlers}, {NULL, NULL} }; void torch_utils_init(lua_State *L) { torch_updateerrorhandlers(L); luaT_setfuncs(L, torch_utils__, 0); } utils.h000066400000000000000000000012461316246254300124000ustar00rootroot00000000000000#ifndef TORCH_UTILS_INC #define TORCH_UTILS_INC #include "luaT.h" #include "TH.h" #include #include #ifdef _WIN32 #else #include #endif #ifdef __cplusplus # define TORCH_EXTERNC extern "C" #else # define TORCH_EXTERNC extern #endif #ifdef _WIN32 # ifdef torch_EXPORTS # define TORCH_API TORCH_EXTERNC __declspec(dllexport) # else # define TORCH_API TORCH_EXTERNC __declspec(dllimport) # endif #else # define TORCH_API TORCH_EXTERNC #endif TORCH_API THLongStorage* torch_checklongargs(lua_State *L, int index); TORCH_API int torch_islongargs(lua_State *L, int index); TORCH_API const char* torch_getdefaulttensortype(lua_State *L); #endif